User Inputs

output.var = params$output.var 

transform.abs = FALSE
log.pred = params$log.pred
norm.pred = FALSE
eda = params$eda
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 8
##  $ output.var         : chr "y3"
##  $ log.pred           : logi TRUE
##  $ eda                : logi FALSE
##  $ algo.forward.caret : logi TRUE
##  $ algo.backward.caret: logi TRUE
##  $ algo.stepwise.caret: logi TRUE
##  $ algo.LASSO.caret   : logi TRUE
##  $ algo.LARS.caret    : logi TRUE
# Setup Labels
output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.log') else  output.var.tr = output.var

Loading Data

feat  = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')

Data validation

cc  = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
##       JobName           y3        
##  Job_00001:   1   Min.   : 95.91  
##  Job_00002:   1   1st Qu.:118.29  
##  Job_00003:   1   Median :124.03  
##  Job_00004:   1   Mean   :125.40  
##  Job_00007:   1   3rd Qu.:131.06  
##  Job_00008:   1   Max.   :193.73  
##  (Other)  :6974

Output Variable

The Output Variable y3 shows right skewness, so will proceed with a log transformation

Histogram

df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() 

  #stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  

QQPlot

ggplot(gather(select_at(data,output.var)), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Trasformation of Output Variable from y3 to y3.log

if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
  data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=2)

ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Best Normalizator y3

Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project

t=bestNormalize::bestNormalize(data[[output.var]])
t
## Best Normalizing transformation with 6980 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - No transform: 2.8839 
##  - Box-Cox: 1.3694 
##  - Log_b(x+a): 1.9466 
##  - sqrt(x+a): 2.3519 
##  - exp(x): 749.0131 
##  - arcsinh(x): 1.9466 
##  - Yeo-Johnson: 1.1427 
##  - orderNorm: 1.1189 
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 6980 nonmissing obs and no ties 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
##  95.913 118.289 124.030 131.059 193.726
qqnorm(data[[output.var]])

qqnorm(predict(t))

orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution

Predictors

All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)

Interesting Predictors

Histograms

if (eda == TRUE){
  cols = c('x11','x18','stat98','x7','stat110')
  df=gather(select_at(data,cols))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=3)
  
  # ggplot(gather(select_at(data,cols)), aes(sample=value)) + 
  #   stat_qq()+
  #   facet_wrap(~key, scales = 'free',ncol=2)
  
  lapply(select_at(data,cols),summary)
}

Scatter plot vs. output variable **y3.log

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light green',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=3)
}

All Predictors

Histograms

All indicators have a strong indication of Fat-Tails

if (eda == TRUE){
  df=gather(select_at(data,predictors))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=4)
}

Correlations

With Output Variable

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
                            ,select_at(data,output.var.tr)),4))  %>%
    rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
  #DT::datatable(t)
  message("Top Positive")
  kable(head(arrange(t,desc(y3.log)),20))
  message("Top Negative")
  kable(head(arrange(t,y3.log),20))
}

Between All Variables

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
  #DT::datatable(t,options=list(scrollX=T))
  message("Showing only 10 variables")
  kable(t[1:10,1:10])
}

Scatter Plots with Output Variable

Scatter plots with all predictors and the output variable (y3.log)

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light blue',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=4)
}

Multicollinearity - VIF

No Multicollinearity among predictors

Showing Top predictor by VIF Value

if (eda == TRUE){
  vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
  head(vifDF,15)
}

Feature Eng

  • Square Root transformation for x18
data.tr=data %>%
  mutate(x18.sqrt = sqrt(x18)) 
cols=c('x18','x18.sqrt')

Comparing Pre and Post Transformation Density Plots

# ggplot(gather(select_at(data.tr,cols)), aes(value)) + 
#   geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
#   geom_density() + 
#   facet_wrap(~key, scales = 'free',ncol=4)

d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light blue',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#removing unwanted variables
data.tr=data.tr %>%
  dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])

data=data.tr
label.names=output.var.tr

Modeling

PCA

InteractionMode = 2 # 0 for no interaction, 1 for Full 2 way interaction and 2 for Intelligent Interaction 

pca.vars  = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]

if(InteractionMode == 1){
  pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
  #saveRDS(pca.model,'pca.model.rds')
}
if (InteractionMode == 0){
  pca.model =  prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
if (InteractionMode == 2){
  controlled.vars = pca.vars[grep("^x",pca.vars)]
  stat.vars = pca.vars[grep("^stat",pca.vars)]
  
  two.way = paste0('~(',paste0(controlled.vars, collapse ='+'),')^2')
  no.interact = paste0(stat.vars, collapse ='+')
  
  pca.formula = as.formula(paste(two.way, no.interact, sep = "+"))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
}

targetCumVar = .8

pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 138 PCAs justify 80.0% of the total Variance. (79.9%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained",   type='b')

plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')

screeplot(pca.model,npcs = pca.model$pcaSelCount)

screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')

#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>% 
  dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
                                 ,!!colnames(pca.model$rotation)[pca.model$pcaSel])
  )

Train Test Split

data.pca = data.pca[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)

data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)

Common Functions

plot.diagnostics <-  function(model, train) {
  plot(model)
  
  residuals = resid(model) # Plotted above in plot(lm.out)
  r.standard = rstandard(model)
  r.student = rstudent(model)
  
  df = data.frame(x=predict(model,train),y=r.student)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = 0,size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  
  df = data.frame(x=predict(model,train),y=r.standard)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = c(-2,0,2),size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  # Histogram
  df=data.frame(r.student)
  p=ggplot(data=df,aes(r.student)) +
    geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) + 
    stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
    ylab("Density")+
    xlab("Studentized Residuals")+
    ggtitle("Distribution of Studentized Residuals")
  plot(p)
  # http://www.stat.columbia.edu/~martin/W2024/R7.pdf
  # Influential plots
  inf.meas = influence.measures(model)
  # print (summary(inf.meas)) # too much data
  
  # Leverage plot
  lev = hat(model.matrix(model))
  df=tibble::rownames_to_column(as.data.frame(lev),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    ylab('Leverage - check') + 
    xlab('Index')
  plot(p)
  # Cook's Distance
  cd = cooks.distance(model)
  df=tibble::rownames_to_column(as.data.frame(cd),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
    ylab('Cooks distances') + 
    geom_hline(yintercept = c(4/nrow(train),0),size=1)+
    xlab('Index')
  plot(p)
  print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = "")) 
  print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = "")) 
  return(cd)
}

# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html 
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
  #B is the number of resamples and integer vector of M (numbers + tune length if any)
  B <- if (method == "cv") numbers
  else if(method == "repeatedcv") numbers * repeats
  else NULL
  if(is.null(length)) {
    seeds <- NULL
  } else {
    set.seed(seed = seed)
    seeds <- vector(mode = "list", length = B)
    seeds <- lapply(seeds, function(x) sample.int(n = 1000000
                                                  , size = numbers + ifelse(is.null(tunes), 0, tunes)))
    seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
  }
  # return seeds
  seeds
}



train.caret.glmselect = function(formula, data, method
                                 ,subopt = NULL, feature.names
                                 , train.control = NULL, tune.grid = NULL, pre.proc = NULL){
  
  if(is.null(train.control)){
    train.control <- trainControl(method = "cv"
                              ,number = 10
                              ,seeds = setCaretSeeds(method = "cv"
                                                     , numbers = 10
                                                     , seed = 1701)
                              ,search = "grid"
                              ,verboseIter = TRUE
                              ,allowParallel = TRUE
                              )
  }
  
  if(is.null(tune.grid)){
    if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
      tune.grid = data.frame(nvmax = 1:length(feature.names))
    }
    if (method == 'glmnet' && subopt == 'LASSO'){
      # Will only show 1 Lambda value during training, but that is OK
      # https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
      # Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
      lambda = 10^seq(-2,0, length =100)
      alpha = c(1)
      tune.grid = expand.grid(alpha = alpha,lambda = lambda)
    }
    if (method == 'lars'){
      # https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
      fraction = seq(0, 1, length = 100)
      tune.grid = expand.grid(fraction = fraction)
      pre.proc = c("center", "scale") 
    }
  }
  
  # http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
  # #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
  cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
  registerDoParallel(cl)

  set.seed(1) 
  # note that the seed has to actually be set just before this function is called
  # settign is above just not ensure reproducibility for some reason
  model.caret <- caret::train(formula
                              , data = data
                              , method = method
                              , tuneGrid = tune.grid
                              , trControl = train.control
                              , preProc = pre.proc
                              )
  
  stopCluster(cl)
  registerDoSEQ() # register sequential engine in case you are not using this function anymore
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    print("All models results")
    print(model.caret$results) # all model results
    print("Best Model")
    print(model.caret$bestTune) # best model
    model = model.caret$finalModel

    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-nvmax) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    # leap function does not support studentized residuals
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)
   
    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    id = rownames(model.caret$bestTune)    
    # Provides the coefficients of the best model
    # regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
    # https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
    print("Coefficients of final model:")
    coefs <- coef(model, id=id)
    #calculate the model to the the coef intervals
    nams <- names(coefs)
    nams <- nams[!nams %in% "(Intercept)"]
    response <-  as.character(formula[[2]])
    form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
    mod <- lm(form, data = data)
    #coefs
    #coef(mod)
    print(car::Confint(mod))
    return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
                ,modelLM=mod))
  }
  if (method == 'glmnet' && subopt == 'LASSO'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    print(model.caret$results)
    model=model.caret$finalModel
    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-lambda) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot 
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') +
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    #no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
    t=coef(model,s=model.caret$bestTune$lambda)
    model.coef = t[which(t[,1]!=0),]
    print(as.data.frame(model.coef))
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
  }
  if (method == 'lars'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    # Metrics Plot
    dataPlot = model.caret$results %>%
        gather(key='metric',value='value',-fraction) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
    model.coef = t[which(t!=0)]
    print(model.coef)
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
  }
}

# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
    #form <- as.formula(object$call[[2]])
    mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
    coefi <- coef(object, id = id)
    xvars <- names(coefi)
    return(mat[,xvars]%*%coefi)
}
  
test.model = function(model, test, level=0.95
                      ,draw.limits = FALSE, good = 0.1, ok = 0.15
                      ,method = NULL, subopt = NULL
                      ,id = NULL, formula, feature.names, label.names
                      ,transformation = NULL){
  ## if using caret for glm select equivalent functionality, 
  ## need to pass formula (full is ok as it will select subset of variables from there)
  if (is.null(method)){
    pred = predict(model, newdata=test, interval="confidence", level = level) 
  }
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
  }
  
  if (method == 'glmnet' && subopt == 'LASSO'){
    xtest = as.matrix(test[,feature.names]) 
    pred=as.data.frame(predict(model, xtest))
  }
  
  if (method == 'lars'){
    pred=as.data.frame(predict(model, newdata = test))
  }
    
  # Summary of predicted values
  print ("Summary of predicted values: ")
  print(summary(pred[,1]))

  test.mse = mean((test[,label.names]-pred[,1])^2)
  print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
  
  if(log.pred == TRUE || norm.pred == TRUE){
    # plot transformewd comparison first
    df=data.frame(x=test[,label.names],y=pred[,1])
    ggplot(df,aes(x=x,y=y)) +
      geom_point(color='blue',alpha=0.5,shape=20,size=2) +
      geom_abline(slope=1,intercept=0,color='black',size=1) +
      #scale_y_continuous(limits=c(min(df),max(df)))+
      xlab("Actual (Transformed)")+
      ylab("Predicted (Transformed)")
  }
    
  if (log.pred == FALSE && norm.pred == FALSE){
    x = test[,label.names]
    y = pred[,1]
  }
  if (log.pred == TRUE){
    x = 10^test[,label.names]
    y = 10^pred[,1]  
  }
  if (norm.pred == TRUE){
    x = predict(transformation, test[,label.names], inverse = TRUE)
    y = predict(transformation, pred[,1], inverse = TRUE)
  }

  df=data.frame(x,y)
  ggplot(df,aes(x,y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
                ,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
    #scale_y_continuous(limits=c(min(df),max(df)))+
    xlab("Actual")+
    ylab("Predicted") 
    
 
}

Setup Formulae

n <- names(data.train)
 formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
                             ," ~", paste(n[!n %in% label.names], collapse = " + "))) 

grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))

print(formula)
## y3.log ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + 
##     PC10 + PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 + 
##     PC19 + PC20 + PC21 + PC22 + PC23 + PC24 + PC25 + PC26 + PC27 + 
##     PC28 + PC29 + PC30 + PC31 + PC32 + PC33 + PC34 + PC35 + PC36 + 
##     PC37 + PC38 + PC39 + PC40 + PC41 + PC42 + PC43 + PC44 + PC45 + 
##     PC46 + PC47 + PC48 + PC49 + PC50 + PC51 + PC52 + PC53 + PC54 + 
##     PC55 + PC56 + PC57 + PC58 + PC59 + PC60 + PC61 + PC62 + PC63 + 
##     PC64 + PC65 + PC66 + PC67 + PC68 + PC69 + PC70 + PC71 + PC72 + 
##     PC73 + PC74 + PC75 + PC76 + PC77 + PC78 + PC79 + PC80 + PC81 + 
##     PC82 + PC83 + PC84 + PC85 + PC86 + PC87 + PC88 + PC89 + PC90 + 
##     PC91 + PC92 + PC93 + PC94 + PC95 + PC96 + PC97 + PC98 + PC99 + 
##     PC100 + PC101 + PC102 + PC103 + PC104 + PC105 + PC106 + PC107 + 
##     PC108 + PC109 + PC110 + PC111 + PC112 + PC113 + PC114 + PC115 + 
##     PC116 + PC117 + PC118 + PC119 + PC120 + PC121 + PC122 + PC123 + 
##     PC124 + PC125 + PC126 + PC127 + PC128 + PC129 + PC130 + PC131 + 
##     PC132 + PC133 + PC134 + PC135 + PC136 + PC137 + PC138
print(grand.mean.formula)
## y3.log ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]

Full Model

model.full = lm(formula , data.train)
summary(model.full)
## 
## Call:
## lm(formula = formula, data = data.train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.090531 -0.022188 -0.005378  0.017039  0.193080 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.097e+00  4.297e-04 4879.440  < 2e-16 ***
## PC1         -9.765e-04  9.180e-05  -10.637  < 2e-16 ***
## PC2          1.204e-03  1.013e-04   11.890  < 2e-16 ***
## PC3         -2.780e-04  1.116e-04   -2.491 0.012781 *  
## PC4          1.683e-04  1.127e-04    1.493 0.135525    
## PC5          6.824e-04  1.147e-04    5.947 2.91e-09 ***
## PC6          7.099e-05  1.156e-04    0.614 0.539006    
## PC7         -5.115e-04  1.173e-04   -4.362 1.31e-05 ***
## PC8         -2.066e-04  1.169e-04   -1.768 0.077126 .  
## PC9         -2.320e-04  1.222e-04   -1.899 0.057653 .  
## PC10         2.618e-04  1.241e-04    2.110 0.034880 *  
## PC11        -1.364e-03  1.267e-04  -10.763  < 2e-16 ***
## PC12         3.532e-04  1.265e-04    2.792 0.005255 ** 
## PC13         4.169e-04  1.308e-04    3.188 0.001441 ** 
## PC14         1.499e-03  1.299e-04   11.536  < 2e-16 ***
## PC15        -3.569e-04  1.332e-04   -2.679 0.007406 ** 
## PC16         7.894e-04  1.362e-04    5.798 7.10e-09 ***
## PC17        -1.726e-04  1.406e-04   -1.228 0.219558    
## PC18         4.057e-04  1.444e-04    2.809 0.004987 ** 
## PC19        -3.291e-04  1.485e-04   -2.216 0.026752 *  
## PC20         9.082e-04  1.557e-04    5.833 5.77e-09 ***
## PC21        -1.022e-03  1.593e-04   -6.416 1.52e-10 ***
## PC22         4.011e-03  1.661e-04   24.151  < 2e-16 ***
## PC23        -5.266e-04  3.335e-04   -1.579 0.114371    
## PC24        -1.453e-03  3.711e-04   -3.915 9.14e-05 ***
## PC25         9.771e-04  3.678e-04    2.656 0.007922 ** 
## PC26        -4.768e-04  3.705e-04   -1.287 0.198170    
## PC27        -3.659e-04  3.725e-04   -0.982 0.325991    
## PC28         7.895e-04  3.738e-04    2.112 0.034723 *  
## PC29        -7.187e-04  3.739e-04   -1.922 0.054654 .  
## PC30         6.490e-04  3.742e-04    1.735 0.082856 .  
## PC31        -3.553e-04  3.760e-04   -0.945 0.344797    
## PC32         1.333e-03  3.736e-04    3.567 0.000364 ***
## PC33         1.430e-04  3.742e-04    0.382 0.702302    
## PC34        -4.754e-05  3.761e-04   -0.126 0.899408    
## PC35        -6.553e-05  3.784e-04   -0.173 0.862527    
## PC36         5.842e-04  3.785e-04    1.544 0.122763    
## PC37        -3.842e-04  3.780e-04   -1.016 0.309441    
## PC38         4.310e-04  3.826e-04    1.126 0.260035    
## PC39         3.447e-04  3.839e-04    0.898 0.369208    
## PC40         3.159e-04  3.805e-04    0.830 0.406437    
## PC41        -6.995e-04  3.789e-04   -1.846 0.064913 .  
## PC42        -5.578e-04  3.819e-04   -1.460 0.144214    
## PC43         7.032e-04  3.842e-04    1.830 0.067271 .  
## PC44        -5.113e-04  3.821e-04   -1.338 0.180863    
## PC45         1.228e-03  3.828e-04    3.208 0.001346 ** 
## PC46         1.505e-03  3.846e-04    3.913 9.25e-05 ***
## PC47         5.523e-04  3.863e-04    1.430 0.152856    
## PC48         5.154e-05  3.868e-04    0.133 0.894008    
## PC49         1.915e-04  3.867e-04    0.495 0.620475    
## PC50        -8.834e-04  3.867e-04   -2.285 0.022373 *  
## PC51         5.447e-04  3.829e-04    1.422 0.154941    
## PC52         4.093e-04  3.853e-04    1.062 0.288210    
## PC53        -5.554e-04  3.898e-04   -1.425 0.154263    
## PC54        -4.988e-05  3.909e-04   -0.128 0.898461    
## PC55         1.690e-04  3.884e-04    0.435 0.663498    
## PC56         1.699e-05  3.936e-04    0.043 0.965566    
## PC57         4.239e-04  3.897e-04    1.088 0.276689    
## PC58        -5.486e-04  3.915e-04   -1.401 0.161161    
## PC59         1.273e-03  3.931e-04    3.239 0.001207 ** 
## PC60        -8.035e-04  3.945e-04   -2.037 0.041702 *  
## PC61        -2.923e-04  3.980e-04   -0.734 0.462741    
## PC62        -5.600e-04  3.928e-04   -1.426 0.154017    
## PC63         6.699e-04  3.960e-04    1.692 0.090753 .  
## PC64         1.974e-04  3.969e-04    0.497 0.619008    
## PC65         8.933e-04  3.979e-04    2.245 0.024808 *  
## PC66         1.768e-04  3.980e-04    0.444 0.656863    
## PC67         4.047e-04  3.984e-04    1.016 0.309763    
## PC68         8.856e-04  3.979e-04    2.226 0.026072 *  
## PC69        -1.249e-03  3.988e-04   -3.132 0.001744 ** 
## PC70         8.471e-05  4.004e-04    0.212 0.832448    
## PC71        -8.288e-04  3.997e-04   -2.073 0.038177 *  
## PC72         8.328e-04  3.984e-04    2.090 0.036640 *  
## PC73         5.934e-04  3.983e-04    1.490 0.136278    
## PC74        -1.005e-04  4.041e-04   -0.249 0.803594    
## PC75         2.561e-04  4.045e-04    0.633 0.526635    
## PC76         1.605e-03  4.004e-04    4.008 6.22e-05 ***
## PC77        -3.733e-05  4.023e-04   -0.093 0.926070    
## PC78        -3.865e-04  4.041e-04   -0.956 0.338911    
## PC79         8.152e-04  4.023e-04    2.027 0.042759 *  
## PC80        -3.806e-04  4.033e-04   -0.944 0.345438    
## PC81        -5.816e-04  4.070e-04   -1.429 0.153099    
## PC82        -1.121e-03  4.035e-04   -2.779 0.005469 ** 
## PC83         1.166e-03  4.040e-04    2.886 0.003922 ** 
## PC84         2.597e-05  4.045e-04    0.064 0.948808    
## PC85         4.157e-04  4.065e-04    1.022 0.306608    
## PC86         5.556e-04  4.107e-04    1.353 0.176125    
## PC87         5.798e-04  4.082e-04    1.420 0.155570    
## PC88         4.339e-04  4.089e-04    1.061 0.288674    
## PC89         2.791e-04  4.059e-04    0.688 0.491629    
## PC90        -2.377e-03  4.085e-04   -5.818 6.30e-09 ***
## PC91         4.025e-04  4.086e-04    0.985 0.324562    
## PC92         2.796e-04  4.108e-04    0.681 0.496073    
## PC93        -1.612e-04  4.119e-04   -0.391 0.695572    
## PC94         5.326e-04  4.118e-04    1.293 0.195976    
## PC95         2.456e-04  4.115e-04    0.597 0.550627    
## PC96        -8.829e-04  4.138e-04   -2.134 0.032899 *  
## PC97        -2.984e-04  4.133e-04   -0.722 0.470300    
## PC98         5.901e-04  4.157e-04    1.420 0.155761    
## PC99        -1.017e-05  4.139e-04   -0.025 0.980409    
## PC100       -5.491e-05  4.174e-04   -0.132 0.895335    
## PC101       -9.925e-04  4.134e-04   -2.401 0.016389 *  
## PC102        1.445e-03  4.172e-04    3.464 0.000536 ***
## PC103        1.656e-04  4.177e-04    0.397 0.691745    
## PC104        6.401e-04  4.169e-04    1.536 0.124718    
## PC105        2.704e-04  4.187e-04    0.646 0.518401    
## PC106        9.546e-04  4.192e-04    2.277 0.022802 *  
## PC107        4.008e-04  4.181e-04    0.959 0.337768    
## PC108       -1.211e-03  4.190e-04   -2.891 0.003861 ** 
## PC109       -3.250e-04  4.196e-04   -0.775 0.438615    
## PC110       -4.030e-04  4.192e-04   -0.961 0.336434    
## PC111       -8.087e-04  4.201e-04   -1.925 0.054267 .  
## PC112        9.124e-04  4.205e-04    2.170 0.030038 *  
## PC113       -9.447e-06  4.207e-04   -0.022 0.982085    
## PC114       -3.020e-04  4.224e-04   -0.715 0.474698    
## PC115       -6.572e-04  4.211e-04   -1.561 0.118609    
## PC116       -6.015e-05  4.214e-04   -0.143 0.886486    
## PC117       -1.682e-03  4.229e-04   -3.977 7.06e-05 ***
## PC118       -9.490e-04  4.240e-04   -2.238 0.025235 *  
## PC119       -8.211e-04  4.273e-04   -1.922 0.054703 .  
## PC120       -5.498e-04  4.255e-04   -1.292 0.196351    
## PC121        4.036e-04  4.281e-04    0.943 0.345750    
## PC122       -7.965e-05  4.310e-04   -0.185 0.853371    
## PC123       -7.135e-04  4.282e-04   -1.666 0.095717 .  
## PC124       -1.246e-04  4.270e-04   -0.292 0.770428    
## PC125       -4.637e-05  4.282e-04   -0.108 0.913776    
## PC126        1.035e-03  4.286e-04    2.415 0.015790 *  
## PC127        1.069e-03  4.250e-04    2.515 0.011920 *  
## PC128       -4.753e-04  4.318e-04   -1.101 0.271047    
## PC129       -7.344e-04  4.301e-04   -1.708 0.087762 .  
## PC130       -1.290e-04  4.319e-04   -0.299 0.765159    
## PC131        1.157e-03  4.315e-04    2.681 0.007360 ** 
## PC132        1.509e-03  4.297e-04    3.513 0.000447 ***
## PC133       -4.910e-04  4.328e-04   -1.135 0.256580    
## PC134       -2.839e-04  4.324e-04   -0.657 0.511472    
## PC135       -1.602e-05  4.320e-04   -0.037 0.970424    
## PC136        6.361e-04  4.353e-04    1.461 0.144009    
## PC137        8.859e-05  4.332e-04    0.204 0.837979    
## PC138        1.132e-03  4.376e-04    2.587 0.009709 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03202 on 5445 degrees of freedom
## Multiple R-squared:  0.2367, Adjusted R-squared:  0.2173 
## F-statistic: 12.23 on 138 and 5445 DF,  p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)

## [1] "Number of data points that have Cook's D > 4/n: 256"
## [1] "Number of data points that have Cook's D > 1: 0"

Checking with removal of high influence points

high.cd = names(cd.full[cd.full > 4/nrow(data.train)])

#save dataset with high.cd flagged
t = data.train %>% 
  rownames_to_column() %>%
  mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
## 
## Call:
## lm(formula = formula, data = data.train2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.06020 -0.01906 -0.00325  0.01668  0.07266 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.093e+00  3.585e-04 5837.593  < 2e-16 ***
## PC1         -9.218e-04  7.658e-05  -12.038  < 2e-16 ***
## PC2          1.285e-03  8.456e-05   15.192  < 2e-16 ***
## PC3         -3.777e-04  9.308e-05   -4.057 5.04e-05 ***
## PC4          1.297e-04  9.388e-05    1.382 0.167007    
## PC5          5.773e-04  9.574e-05    6.030 1.75e-09 ***
## PC6          9.269e-05  9.637e-05    0.962 0.336182    
## PC7         -5.744e-04  9.761e-05   -5.884 4.25e-09 ***
## PC8         -1.877e-04  9.820e-05   -1.911 0.056036 .  
## PC9         -2.546e-04  1.016e-04   -2.507 0.012196 *  
## PC10         7.232e-05  1.035e-04    0.699 0.484852    
## PC11        -1.438e-03  1.054e-04  -13.649  < 2e-16 ***
## PC12         4.231e-04  1.057e-04    4.004 6.32e-05 ***
## PC13         3.137e-04  1.093e-04    2.871 0.004110 ** 
## PC14         1.639e-03  1.084e-04   15.117  < 2e-16 ***
## PC15        -3.042e-04  1.114e-04   -2.731 0.006333 ** 
## PC16         8.430e-04  1.136e-04    7.418 1.38e-13 ***
## PC17        -1.613e-04  1.173e-04   -1.375 0.169075    
## PC18         4.671e-04  1.208e-04    3.868 0.000111 ***
## PC19        -3.364e-04  1.239e-04   -2.715 0.006647 ** 
## PC20         9.398e-04  1.301e-04    7.221 5.91e-13 ***
## PC21        -8.681e-04  1.325e-04   -6.551 6.27e-11 ***
## PC22         4.004e-03  1.383e-04   28.948  < 2e-16 ***
## PC23        -6.460e-04  2.793e-04   -2.313 0.020745 *  
## PC24        -1.379e-03  3.097e-04   -4.452 8.68e-06 ***
## PC25         1.017e-03  3.063e-04    3.322 0.000901 ***
## PC26        -7.350e-04  3.092e-04   -2.377 0.017497 *  
## PC27        -2.869e-04  3.108e-04   -0.923 0.355918    
## PC28         7.403e-04  3.117e-04    2.375 0.017572 *  
## PC29        -5.809e-04  3.121e-04   -1.861 0.062745 .  
## PC30         6.229e-04  3.124e-04    1.994 0.046190 *  
## PC31         7.194e-05  3.143e-04    0.229 0.818955    
## PC32         1.127e-03  3.115e-04    3.619 0.000299 ***
## PC33         1.899e-04  3.118e-04    0.609 0.542483    
## PC34        -3.772e-04  3.137e-04   -1.202 0.229333    
## PC35         2.116e-05  3.155e-04    0.067 0.946539    
## PC36         5.659e-04  3.160e-04    1.791 0.073350 .  
## PC37        -5.222e-04  3.150e-04   -1.658 0.097405 .  
## PC38         2.701e-04  3.195e-04    0.845 0.397889    
## PC39         2.622e-04  3.213e-04    0.816 0.414477    
## PC40         2.346e-04  3.176e-04    0.739 0.460029    
## PC41        -6.475e-04  3.162e-04   -2.048 0.040620 *  
## PC42        -6.866e-04  3.196e-04   -2.148 0.031733 *  
## PC43         6.060e-04  3.203e-04    1.892 0.058569 .  
## PC44        -3.149e-04  3.191e-04   -0.987 0.323855    
## PC45         1.319e-03  3.205e-04    4.117 3.90e-05 ***
## PC46         1.261e-03  3.210e-04    3.930 8.62e-05 ***
## PC47         1.995e-04  3.224e-04    0.619 0.536179    
## PC48        -2.922e-04  3.241e-04   -0.902 0.367273    
## PC49        -1.730e-04  3.229e-04   -0.536 0.592140    
## PC50        -3.697e-04  3.225e-04   -1.146 0.251685    
## PC51         4.581e-04  3.193e-04    1.435 0.151429    
## PC52         6.001e-04  3.215e-04    1.866 0.062062 .  
## PC53        -3.911e-04  3.249e-04   -1.204 0.228655    
## PC54        -7.146e-06  3.260e-04   -0.022 0.982513    
## PC55         1.482e-04  3.251e-04    0.456 0.648594    
## PC56         1.084e-04  3.280e-04    0.331 0.740971    
## PC57         1.750e-04  3.253e-04    0.538 0.590750    
## PC58        -6.214e-04  3.267e-04   -1.902 0.057184 .  
## PC59         8.635e-04  3.263e-04    2.646 0.008168 ** 
## PC60        -5.853e-04  3.291e-04   -1.779 0.075351 .  
## PC61        -3.302e-04  3.319e-04   -0.995 0.319833    
## PC62        -4.346e-04  3.278e-04   -1.326 0.184983    
## PC63         7.022e-04  3.302e-04    2.126 0.033532 *  
## PC64        -5.134e-05  3.306e-04   -0.155 0.876608    
## PC65         8.445e-04  3.320e-04    2.544 0.010991 *  
## PC66         1.845e-04  3.321e-04    0.556 0.578545    
## PC67         2.889e-04  3.335e-04    0.866 0.386424    
## PC68         1.116e-03  3.327e-04    3.354 0.000802 ***
## PC69        -1.129e-03  3.332e-04   -3.389 0.000707 ***
## PC70        -3.221e-04  3.331e-04   -0.967 0.333569    
## PC71        -1.070e-03  3.338e-04   -3.206 0.001352 ** 
## PC72         6.772e-04  3.327e-04    2.035 0.041866 *  
## PC73         5.754e-04  3.320e-04    1.733 0.083079 .  
## PC74         2.526e-04  3.375e-04    0.748 0.454258    
## PC75        -6.085e-05  3.365e-04   -0.181 0.856514    
## PC76         1.592e-03  3.336e-04    4.773 1.87e-06 ***
## PC77         2.632e-05  3.355e-04    0.078 0.937491    
## PC78        -3.115e-04  3.359e-04   -0.927 0.353916    
## PC79         7.268e-04  3.371e-04    2.156 0.031115 *  
## PC80        -2.630e-04  3.369e-04   -0.781 0.435045    
## PC81        -6.244e-04  3.388e-04   -1.843 0.065388 .  
## PC82        -8.437e-04  3.379e-04   -2.497 0.012571 *  
## PC83         1.320e-03  3.375e-04    3.910 9.34e-05 ***
## PC84        -2.965e-05  3.381e-04   -0.088 0.930121    
## PC85         4.614e-04  3.402e-04    1.356 0.175074    
## PC86         1.777e-04  3.435e-04    0.517 0.604940    
## PC87         5.984e-04  3.409e-04    1.755 0.079282 .  
## PC88         1.529e-04  3.402e-04    0.449 0.653181    
## PC89         2.414e-05  3.398e-04    0.071 0.943356    
## PC90        -2.226e-03  3.413e-04   -6.523 7.54e-11 ***
## PC91         5.740e-04  3.428e-04    1.675 0.094078 .  
## PC92         2.810e-04  3.432e-04    0.819 0.412995    
## PC93         1.128e-05  3.427e-04    0.033 0.973746    
## PC94         5.706e-04  3.441e-04    1.658 0.097330 .  
## PC95         3.536e-04  3.435e-04    1.029 0.303312    
## PC96        -6.900e-04  3.446e-04   -2.002 0.045339 *  
## PC97        -2.183e-04  3.460e-04   -0.631 0.528103    
## PC98         4.201e-04  3.474e-04    1.209 0.226596    
## PC99        -4.002e-05  3.460e-04   -0.116 0.907935    
## PC100       -2.645e-04  3.483e-04   -0.759 0.447644    
## PC101       -8.152e-04  3.457e-04   -2.358 0.018418 *  
## PC102        1.576e-03  3.468e-04    4.543 5.66e-06 ***
## PC103        2.746e-06  3.489e-04    0.008 0.993721    
## PC104        5.770e-04  3.478e-04    1.659 0.097209 .  
## PC105        5.902e-06  3.488e-04    0.017 0.986501    
## PC106        1.003e-03  3.504e-04    2.862 0.004221 ** 
## PC107        3.966e-04  3.496e-04    1.134 0.256668    
## PC108       -7.518e-04  3.490e-04   -2.155 0.031245 *  
## PC109       -2.466e-04  3.493e-04   -0.706 0.480375    
## PC110       -2.181e-04  3.494e-04   -0.624 0.532562    
## PC111       -7.398e-04  3.501e-04   -2.113 0.034628 *  
## PC112        7.066e-04  3.506e-04    2.015 0.043916 *  
## PC113        1.701e-04  3.517e-04    0.484 0.628745    
## PC114       -2.693e-04  3.524e-04   -0.764 0.444826    
## PC115       -7.366e-04  3.519e-04   -2.093 0.036396 *  
## PC116       -9.007e-06  3.509e-04   -0.026 0.979522    
## PC117       -1.580e-03  3.536e-04   -4.468 8.07e-06 ***
## PC118       -9.378e-04  3.552e-04   -2.640 0.008314 ** 
## PC119       -8.418e-04  3.570e-04   -2.358 0.018424 *  
## PC120       -6.197e-04  3.549e-04   -1.746 0.080862 .  
## PC121        5.632e-04  3.575e-04    1.575 0.115248    
## PC122       -2.975e-04  3.603e-04   -0.826 0.408912    
## PC123       -2.495e-04  3.571e-04   -0.699 0.484670    
## PC124       -3.537e-04  3.567e-04   -0.992 0.321316    
## PC125        2.699e-04  3.583e-04    0.753 0.451198    
## PC126        8.272e-04  3.568e-04    2.318 0.020478 *  
## PC127        7.749e-04  3.551e-04    2.182 0.029155 *  
## PC128       -3.578e-04  3.611e-04   -0.991 0.321817    
## PC129       -8.926e-04  3.592e-04   -2.485 0.012987 *  
## PC130       -1.232e-05  3.598e-04   -0.034 0.972678    
## PC131        9.827e-04  3.608e-04    2.724 0.006477 ** 
## PC132        1.560e-03  3.583e-04    4.355 1.36e-05 ***
## PC133       -8.612e-04  3.614e-04   -2.383 0.017211 *  
## PC134       -2.713e-04  3.610e-04   -0.752 0.452368    
## PC135       -9.305e-05  3.598e-04   -0.259 0.795909    
## PC136        5.593e-04  3.640e-04    1.536 0.124519    
## PC137        2.179e-04  3.615e-04    0.603 0.546731    
## PC138        8.440e-04  3.647e-04    2.314 0.020689 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02607 on 5189 degrees of freedom
## Multiple R-squared:  0.3188, Adjusted R-squared:  0.3007 
## F-statistic:  17.6 on 138 and 5189 DF,  p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)

## [1] "Number of data points that have Cook's D > 4/n: 251"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before. 
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot 
plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,target=one_of(label.names))

ggplot(data=plotData, aes(x=type,y=target)) +
  geom_boxplot(fill='light blue',outlier.shape=NA) +
  scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
  theme_light() +
  ggtitle('Distribution of High Leverage Points and Normal  Points')

# 2 sample t-tests

plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,one_of(feature.names))

comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
                   , function(x) t.test(x ~ plotData$type, var.equal = TRUE)) 

sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
##         PC2        PC10        PC21        PC34        PC49        PC70        PC75       PC108 
## 0.037931076 0.007022022 0.015442896 0.047745857 0.020470681 0.033985809 0.039226247 0.010123371
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=5, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

# Distribution (box) Plots
mm = melt(plotData, id=c('type'))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=8, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

Grand Means Model

model.null = lm(grand.mean.formula, data.train)
summary(model.null)
## 
## Call:
## lm(formula = grand.mean.formula, data = data.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11443 -0.02392 -0.00343  0.02090  0.19088 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.0963035  0.0004843    4329   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03619 on 5583 degrees of freedom

Variable Selection

Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/

Forward Selection with CV

Train

if (algo.forward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   , data = data.train
                                   , method = "leapForward"
                                   , feature.names = feature.names)
  model.forward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.03467963 0.08228409 0.02689668 0.0005837199 0.01755905 0.0002999811
## 2       2 0.03442479 0.09584815 0.02672288 0.0006696668 0.01801778 0.0003448861
## 3       3 0.03415987 0.11016571 0.02652062 0.0006149829 0.02064252 0.0004941685
## 4       4 0.03374386 0.13134494 0.02610610 0.0006481791 0.01901965 0.0005078870
## 5       5 0.03335768 0.15117284 0.02583748 0.0006824675 0.02092203 0.0005513551
## 6       6 0.03333923 0.15191631 0.02584422 0.0006931797 0.01936718 0.0005251475
## 7       7 0.03327598 0.15511291 0.02581860 0.0006641517 0.01899973 0.0005259697
## 8       8 0.03317918 0.15995957 0.02575309 0.0006320762 0.01732750 0.0005018923
## 9       9 0.03307505 0.16508344 0.02567850 0.0006160087 0.01698117 0.0004669829
## 10     10 0.03289696 0.17392287 0.02551290 0.0005889235 0.01709557 0.0004697173
## 11     11 0.03291761 0.17299007 0.02548476 0.0006006470 0.01761570 0.0004844542
## 12     12 0.03291500 0.17309136 0.02547065 0.0006001467 0.01673993 0.0004830773
## 13     13 0.03289015 0.17434453 0.02546783 0.0006426586 0.01637780 0.0004957932
## 14     14 0.03289115 0.17437202 0.02545558 0.0007018654 0.01725684 0.0005217945
## 15     15 0.03284130 0.17692317 0.02541656 0.0006895316 0.01819374 0.0005112121
## 16     16 0.03285292 0.17633756 0.02542029 0.0006849775 0.01755231 0.0004742705
## 17     17 0.03288050 0.17502441 0.02542037 0.0007001761 0.01707427 0.0005108211
## 18     18 0.03284653 0.17673743 0.02538417 0.0006608252 0.01623923 0.0004841676
## 19     19 0.03281035 0.17853299 0.02536574 0.0006804638 0.01691988 0.0004861180
## 20     20 0.03282852 0.17774842 0.02536737 0.0006985725 0.01802704 0.0005441927
## 21     21 0.03282387 0.17807758 0.02536963 0.0007020519 0.01899034 0.0005247229
## 22     22 0.03281721 0.17835409 0.02535781 0.0007478792 0.01765618 0.0005565969
## 23     23 0.03279416 0.17950156 0.02533902 0.0007710354 0.01681905 0.0005626326
## 24     24 0.03275086 0.18160721 0.02530313 0.0007801356 0.01748304 0.0005713978
## 25     25 0.03274391 0.18207257 0.02528357 0.0007599734 0.01765686 0.0005473072
## 26     26 0.03275078 0.18181347 0.02528047 0.0007808991 0.01768458 0.0005607028
## 27     27 0.03275617 0.18160642 0.02531297 0.0007525466 0.01736412 0.0005702312
## 28     28 0.03274722 0.18212918 0.02531023 0.0007550846 0.01780786 0.0005603116
## 29     29 0.03274248 0.18242858 0.02530561 0.0007520921 0.01870840 0.0005649725
## 30     30 0.03276122 0.18170121 0.02531808 0.0007626415 0.02145986 0.0005924937
## 31     31 0.03276700 0.18149081 0.02530708 0.0007718902 0.02159776 0.0006159969
## 32     32 0.03275929 0.18192517 0.02529951 0.0007547615 0.02066122 0.0005950083
## 33     33 0.03275642 0.18208743 0.02529335 0.0007726665 0.02072365 0.0006097886
## 34     34 0.03276562 0.18172571 0.02528562 0.0007884826 0.01989804 0.0006231518
## 35     35 0.03275643 0.18219436 0.02527723 0.0007783770 0.01955388 0.0006217077
## 36     36 0.03275107 0.18255496 0.02528243 0.0007792909 0.01990465 0.0006096184
## 37     37 0.03273280 0.18344212 0.02527515 0.0007851491 0.02020206 0.0006189663
## 38     38 0.03272513 0.18377459 0.02528503 0.0007688132 0.01902732 0.0006127174
## 39     39 0.03270911 0.18462279 0.02527873 0.0007776991 0.01969676 0.0006010137
## 40     40 0.03274881 0.18275508 0.02531486 0.0007545026 0.01847049 0.0005809108
## 41     41 0.03274136 0.18319268 0.02531524 0.0007721779 0.01872370 0.0006023744
## 42     42 0.03274341 0.18318399 0.02532494 0.0007794003 0.01915266 0.0005981676
## 43     43 0.03272982 0.18384108 0.02531891 0.0007843113 0.02002845 0.0006119334
## 44     44 0.03272383 0.18412692 0.02532393 0.0007867139 0.02025409 0.0006176795
## 45     45 0.03270472 0.18506035 0.02529846 0.0008070006 0.02058011 0.0006212793
## 46     46 0.03269735 0.18543245 0.02528858 0.0007783044 0.01943826 0.0006055289
## 47     47 0.03268474 0.18616665 0.02528938 0.0007917638 0.02134224 0.0006278191
## 48     48 0.03265688 0.18752855 0.02527067 0.0007896322 0.02207858 0.0006340255
## 49     49 0.03265344 0.18772106 0.02525446 0.0007813617 0.02193511 0.0006306541
## 50     50 0.03264060 0.18839356 0.02523165 0.0007581522 0.02210280 0.0006171721
## 51     51 0.03264318 0.18831336 0.02523487 0.0007545332 0.02231320 0.0006054414
## 52     52 0.03263129 0.18889752 0.02522516 0.0007429341 0.02244839 0.0005995996
## 53     53 0.03261722 0.18954526 0.02521700 0.0007572466 0.02261241 0.0006094490
## 54     54 0.03260406 0.19021104 0.02520602 0.0007752339 0.02304311 0.0006273992
## 55     55 0.03261238 0.18987677 0.02521472 0.0007864547 0.02306141 0.0006407488
## 56     56 0.03261052 0.18999016 0.02521135 0.0007972300 0.02364582 0.0006578132
## 57     57 0.03260327 0.19031032 0.02519272 0.0007817686 0.02296486 0.0006358896
## 58     58 0.03261089 0.19006507 0.02519869 0.0007659209 0.02331494 0.0006246004
## 59     59 0.03261517 0.18991092 0.02518804 0.0007686961 0.02343763 0.0006400251
## 60     60 0.03260560 0.19031986 0.02518312 0.0007717041 0.02280514 0.0006269044
## 61     61 0.03258503 0.19129361 0.02516316 0.0007852949 0.02343075 0.0006420171
## 62     62 0.03258588 0.19123250 0.02516381 0.0007926924 0.02284466 0.0006436292
## 63     63 0.03257111 0.19189832 0.02515069 0.0007762863 0.02199510 0.0006330723
## 64     64 0.03256100 0.19245600 0.02514646 0.0007865942 0.02317236 0.0006374292
## 65     65 0.03254998 0.19293934 0.02515268 0.0007919837 0.02292395 0.0006465925
## 66     66 0.03255025 0.19299159 0.02516333 0.0007832775 0.02289653 0.0006329284
## 67     67 0.03254411 0.19331765 0.02515629 0.0007652304 0.02262896 0.0006097431
## 68     68 0.03253289 0.19387044 0.02514096 0.0007666892 0.02294790 0.0006183627
## 69     69 0.03252479 0.19424156 0.02512988 0.0007714158 0.02294470 0.0006242369
## 70     70 0.03251791 0.19454782 0.02513497 0.0007726349 0.02344826 0.0006247216
## 71     71 0.03252288 0.19435740 0.02513742 0.0007817703 0.02363442 0.0006255846
## 72     72 0.03251670 0.19465653 0.02512613 0.0007858251 0.02365387 0.0006206417
## 73     73 0.03251126 0.19493123 0.02513325 0.0007815723 0.02366288 0.0006158133
## 74     74 0.03251022 0.19502016 0.02513308 0.0007688479 0.02348401 0.0006101862
## 75     75 0.03250184 0.19545061 0.02513201 0.0007715226 0.02360648 0.0006145732
## 76     76 0.03250992 0.19510979 0.02513718 0.0007715749 0.02407926 0.0006198025
## 77     77 0.03252202 0.19461914 0.02515128 0.0007820404 0.02464484 0.0006310097
## 78     78 0.03251902 0.19473902 0.02514683 0.0007882468 0.02381499 0.0006352023
## 79     79 0.03249980 0.19559464 0.02513026 0.0007885672 0.02369816 0.0006374617
## 80     80 0.03249390 0.19584469 0.02513062 0.0007818181 0.02332497 0.0006444959
## 81     81 0.03249569 0.19577386 0.02513205 0.0007799144 0.02309096 0.0006431834
## 82     82 0.03249353 0.19591951 0.02513603 0.0007897934 0.02377980 0.0006555678
## 83     83 0.03249619 0.19582865 0.02513634 0.0008021332 0.02385513 0.0006589456
## 84     84 0.03249234 0.19601940 0.02513694 0.0007958498 0.02368722 0.0006561300
## 85     85 0.03249058 0.19612063 0.02514363 0.0007924024 0.02385745 0.0006541214
## 86     86 0.03248392 0.19642923 0.02513816 0.0007842825 0.02364808 0.0006464797
## 87     87 0.03247319 0.19693759 0.02513056 0.0007750023 0.02379821 0.0006407801
## 88     88 0.03246894 0.19713148 0.02512504 0.0007738594 0.02377237 0.0006456344
## 89     89 0.03247160 0.19705435 0.02512687 0.0007722282 0.02375451 0.0006438975
## 90     90 0.03247634 0.19684827 0.02512544 0.0007744665 0.02377566 0.0006465266
## 91     91 0.03247165 0.19708338 0.02512616 0.0007754172 0.02394506 0.0006477233
## 92     92 0.03246893 0.19719213 0.02512729 0.0007647697 0.02394705 0.0006436013
## 93     93 0.03246395 0.19745474 0.02512683 0.0007698467 0.02471194 0.0006544658
## 94     94 0.03246343 0.19750829 0.02512251 0.0007703953 0.02474501 0.0006517236
## 95     95 0.03245853 0.19775077 0.02511543 0.0007698506 0.02489160 0.0006472834
## 96     96 0.03244968 0.19814110 0.02510522 0.0007729884 0.02491682 0.0006505106
## 97     97 0.03244112 0.19852623 0.02509347 0.0007731527 0.02508986 0.0006539841
## 98     98 0.03244202 0.19851605 0.02509713 0.0007766195 0.02541039 0.0006599264
## 99     99 0.03244029 0.19856494 0.02509394 0.0007763590 0.02517093 0.0006594982
## 100   100 0.03242578 0.19921787 0.02508321 0.0007759773 0.02504508 0.0006616697
## 101   101 0.03241853 0.19954239 0.02507774 0.0007758078 0.02518189 0.0006631844
## 102   102 0.03241591 0.19965453 0.02507708 0.0007729732 0.02517536 0.0006635895
## 103   103 0.03240499 0.20015232 0.02507482 0.0007739621 0.02525908 0.0006666102
## 104   104 0.03239982 0.20040189 0.02506789 0.0007649861 0.02520672 0.0006598198
## 105   105 0.03240117 0.20034924 0.02507373 0.0007630631 0.02543303 0.0006598189
## 106   106 0.03239840 0.20048181 0.02507193 0.0007612769 0.02531828 0.0006547925
## 107   107 0.03240315 0.20024726 0.02507623 0.0007630758 0.02517725 0.0006575062
## 108   108 0.03240435 0.20020089 0.02507582 0.0007635573 0.02502667 0.0006602222
## 109   109 0.03239873 0.20048456 0.02507125 0.0007654171 0.02519034 0.0006609220
## 110   110 0.03239459 0.20065839 0.02506665 0.0007677123 0.02476335 0.0006611028
## 111   111 0.03239716 0.20053929 0.02507084 0.0007686129 0.02477107 0.0006616613
## 112   112 0.03239710 0.20054014 0.02506915 0.0007726370 0.02482303 0.0006640473
## 113   113 0.03239941 0.20044980 0.02507223 0.0007748712 0.02513380 0.0006668409
## 114   114 0.03240234 0.20031797 0.02507529 0.0007735815 0.02505888 0.0006645252
## 115   115 0.03239845 0.20049766 0.02507418 0.0007672455 0.02488664 0.0006612124
## 116   116 0.03239582 0.20061452 0.02506970 0.0007696846 0.02493590 0.0006630723
## 117   117 0.03239498 0.20065914 0.02506741 0.0007719129 0.02510054 0.0006637116
## 118   118 0.03239517 0.20064767 0.02506808 0.0007733197 0.02507145 0.0006639964
## 119   119 0.03239868 0.20049399 0.02507132 0.0007750974 0.02505414 0.0006636076
## 120   120 0.03240049 0.20040514 0.02507188 0.0007687080 0.02492257 0.0006569604
## 121   121 0.03239859 0.20049541 0.02507214 0.0007692483 0.02497651 0.0006584755
## 122   122 0.03239859 0.20049254 0.02507250 0.0007697507 0.02495424 0.0006582021
## 123   123 0.03240069 0.20040237 0.02507444 0.0007664690 0.02492546 0.0006548281
## 124   124 0.03240037 0.20042131 0.02507205 0.0007687254 0.02502678 0.0006554686
## 125   125 0.03240377 0.20026253 0.02507517 0.0007683187 0.02501472 0.0006547100
## 126   126 0.03240443 0.20023559 0.02507431 0.0007652942 0.02497196 0.0006546665
## 127   127 0.03240335 0.20028453 0.02507365 0.0007657492 0.02497180 0.0006549634
## 128   128 0.03240499 0.20021101 0.02507490 0.0007648377 0.02495368 0.0006537676
## 129   129 0.03240521 0.20019865 0.02507561 0.0007641579 0.02499913 0.0006546046
## 130   130 0.03240618 0.20015443 0.02507559 0.0007641984 0.02501429 0.0006540456
## 131   131 0.03240604 0.20015733 0.02507552 0.0007640321 0.02503097 0.0006538033
## 132   132 0.03240666 0.20012907 0.02507585 0.0007640936 0.02504140 0.0006532104
## 133   133 0.03240726 0.20009977 0.02507666 0.0007639656 0.02504310 0.0006536631
## 134   134 0.03240786 0.20007536 0.02507707 0.0007641164 0.02504665 0.0006529795
## 135   135 0.03240758 0.20008570 0.02507672 0.0007642833 0.02504061 0.0006528860
## 136   136 0.03240816 0.20006010 0.02507680 0.0007643459 0.02505548 0.0006530196
## 137   137 0.03240795 0.20006894 0.02507642 0.0007643151 0.02506310 0.0006529651
## 138   138 0.03240803 0.20006564 0.02507649 0.0007645271 0.02506156 0.0006530139
## [1] "Best Model"
##     nvmax
## 110   110

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.0965368703  2.095697e+00  2.097377e+00
## PC1         -0.0009753762 -1.154814e-03 -7.959383e-04
## PC2          0.0012035314  1.005546e-03  1.401517e-03
## PC3         -0.0002767035 -4.948937e-04 -5.851338e-05
## PC4          0.0001683756 -5.192600e-05  3.886772e-04
## PC5          0.0006826866  4.583826e-04  9.069907e-04
## PC7         -0.0005099621 -7.392059e-04 -2.807183e-04
## PC8         -0.0002067595 -4.352876e-04  2.176856e-05
## PC9         -0.0002314497 -4.702156e-04  7.316330e-06
## PC10         0.0002611245  1.863521e-05  5.036139e-04
## PC11        -0.0013627613 -1.610508e-03 -1.115014e-03
## PC12         0.0003546051  1.072602e-04  6.019500e-04
## PC13         0.0004167756  1.611092e-04  6.724419e-04
## PC14         0.0015001118  1.246140e-03  1.754083e-03
## PC15        -0.0003584491 -6.189019e-04 -9.799628e-05
## PC16         0.0007904896  5.243205e-04  1.056659e-03
## PC17        -0.0001736328 -4.484116e-04  1.011461e-04
## PC18         0.0004064056  1.241334e-04  6.886777e-04
## PC19        -0.0003290534 -6.193746e-04 -3.873210e-05
## PC20         0.0009086458  6.043066e-04  1.212985e-03
## PC21        -0.0010243370 -1.335834e-03 -7.128398e-04
## PC22         0.0040105844  3.685930e-03  4.335238e-03
## PC23        -0.0005271298 -1.179198e-03  1.249380e-04
## PC24        -0.0014519611 -2.177307e-03 -7.266149e-04
## PC25         0.0009727363  2.537141e-04  1.691759e-03
## PC26        -0.0004758081 -1.200151e-03  2.485353e-04
## PC27        -0.0003703819 -1.098387e-03  3.576233e-04
## PC28         0.0007845835  5.403101e-05  1.515136e-03
## PC29        -0.0007201114 -1.451054e-03  1.083162e-05
## PC30         0.0006503129 -8.115961e-05  1.381786e-03
## PC31        -0.0003568879 -1.091877e-03  3.781014e-04
## PC32         0.0013278584  5.974717e-04  2.058245e-03
## PC36         0.0005845222 -1.552992e-04  1.324344e-03
## PC37        -0.0003920529 -1.130908e-03  3.468022e-04
## PC38         0.0004271293 -3.206515e-04  1.174910e-03
## PC39         0.0003401916 -4.101892e-04  1.090572e-03
## PC40         0.0003206294 -4.231597e-04  1.064418e-03
## PC41        -0.0007005179 -1.441132e-03  4.009582e-05
## PC42        -0.0005636011 -1.309979e-03  1.827765e-04
## PC43         0.0007001334 -5.084190e-05  1.451109e-03
## PC44        -0.0005123994 -1.259461e-03  2.346622e-04
## PC45         0.0012269099  4.783201e-04  1.975500e-03
## PC46         0.0015047325  7.527743e-04  2.256691e-03
## PC47         0.0005506639 -2.045439e-04  1.305872e-03
## PC50        -0.0008826750 -1.638728e-03 -1.266224e-04
## PC51         0.0005462567 -2.024028e-04  1.294916e-03
## PC52         0.0004098906 -3.434558e-04  1.163237e-03
## PC53        -0.0005576225 -1.319556e-03  2.043114e-04
## PC57         0.0004296186 -3.321397e-04  1.191377e-03
## PC58        -0.0005527647 -1.318095e-03  2.125653e-04
## PC59         0.0012702222  5.016661e-04  2.038778e-03
## PC60        -0.0008017669 -1.572844e-03 -3.068976e-05
## PC61        -0.0002846938 -1.062817e-03  4.934299e-04
## PC62        -0.0005640088 -1.331659e-03  2.036412e-04
## PC63         0.0006734582 -1.007575e-04  1.447674e-03
## PC65         0.0008926788  1.148954e-04  1.670462e-03
## PC67         0.0004050070 -3.737709e-04  1.183785e-03
## PC68         0.0008858627  1.080092e-04  1.663716e-03
## PC69        -0.0012529641 -2.032539e-03 -4.733894e-04
## PC71        -0.0008268591 -1.608376e-03 -4.534231e-05
## PC72         0.0008356291  5.699689e-05  1.614261e-03
## PC73         0.0005921479 -1.864336e-04  1.370729e-03
## PC75         0.0002538745 -5.370266e-04  1.044776e-03
## PC76         0.0016080450  8.253431e-04  2.390747e-03
## PC78        -0.0003849174 -1.174979e-03  4.051447e-04
## PC79         0.0008139782  2.750905e-05  1.600447e-03
## PC80        -0.0003822184 -1.170784e-03  4.063477e-04
## PC81        -0.0005777761 -1.373533e-03  2.179804e-04
## PC82        -0.0011160961 -1.905057e-03 -3.271354e-04
## PC83         0.0011666680  3.769274e-04  1.956409e-03
## PC85         0.0004159382 -3.784671e-04  1.210344e-03
## PC86         0.0005542520 -2.483770e-04  1.356881e-03
## PC87         0.0005841709 -2.138372e-04  1.382179e-03
## PC88         0.0004275564 -3.716779e-04  1.226791e-03
## PC89         0.0002772053 -5.162544e-04  1.070665e-03
## PC90        -0.0023723844 -3.170924e-03 -1.573844e-03
## PC91         0.0003996476 -3.990746e-04  1.198370e-03
## PC92         0.0002769611 -5.261904e-04  1.080113e-03
## PC94         0.0005395375 -2.654395e-04  1.344514e-03
## PC96        -0.0008826530 -1.691488e-03 -7.381836e-05
## PC97        -0.0003003734 -1.108344e-03  5.075967e-04
## PC98         0.0005875189 -2.247114e-04  1.399749e-03
## PC101       -0.0009914517 -1.799574e-03 -1.833293e-04
## PC102        0.0014484484  6.327960e-04  2.264101e-03
## PC104        0.0006386730 -1.761698e-04  1.453516e-03
## PC105        0.0002752159 -5.431010e-04  1.093533e-03
## PC106        0.0009514398  1.319122e-04  1.770967e-03
## PC107        0.0003922184 -4.250027e-04  1.209439e-03
## PC108       -0.0012110732 -2.030131e-03 -3.920157e-04
## PC109       -0.0003256071 -1.145878e-03  4.946643e-04
## PC110       -0.0004121828 -1.231737e-03  4.073715e-04
## PC111       -0.0008118751 -1.633126e-03  9.376251e-06
## PC112        0.0009103789  8.847136e-05  1.732287e-03
## PC114       -0.0003074862 -1.133391e-03  5.184185e-04
## PC115       -0.0006585795 -1.481574e-03  1.644146e-04
## PC117       -0.0016856941 -2.512477e-03 -8.589110e-04
## PC118       -0.0009465493 -1.775062e-03 -1.180364e-04
## PC119       -0.0008246670 -1.659911e-03  1.057664e-05
## PC120       -0.0005478879 -1.379798e-03  2.840228e-04
## PC121        0.0004080198 -4.288019e-04  1.244842e-03
## PC123       -0.0007145140 -1.551468e-03  1.224401e-04
## PC126        0.0010369835  1.990034e-04  1.874964e-03
## PC127        0.0010668332  2.360861e-04  1.897580e-03
## PC128       -0.0004767245 -1.320809e-03  3.673603e-04
## PC129       -0.0007336509 -1.574362e-03  1.070599e-04
## PC131        0.0011602279  3.167657e-04  2.003690e-03
## PC132        0.0015102884  6.702975e-04  2.350279e-03
## PC133       -0.0004892188 -1.335368e-03  3.569302e-04
## PC134       -0.0002865181 -1.131822e-03  5.587861e-04
## PC136        0.0006394666 -2.115833e-04  1.490516e-03
## PC138        0.0011348902  2.795909e-04  1.990190e-03

Test

if (algo.forward.caret == TRUE){
    test.model(model=model.forward, test=data.test
             ,method = 'leapForward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.033   2.086   2.098   2.097   2.110   2.159 
## [1] "leapForward  Test MSE: 0.00103301010705246"

Backward Elimination with CV

Train

if (algo.backward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapBackward"
                                   ,feature.names =  feature.names)
  model.backward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.03467963 0.08228409 0.02689668 0.0005837199 0.01755905 0.0002999811
## 2       2 0.03442479 0.09584815 0.02672288 0.0006696668 0.01801778 0.0003448861
## 3       3 0.03415987 0.11016571 0.02652062 0.0006149829 0.02064252 0.0004941685
## 4       4 0.03374386 0.13134494 0.02610610 0.0006481791 0.01901965 0.0005078870
## 5       5 0.03335768 0.15117284 0.02583748 0.0006824675 0.02092203 0.0005513551
## 6       6 0.03333923 0.15191631 0.02584422 0.0006931797 0.01936718 0.0005251475
## 7       7 0.03327598 0.15511291 0.02581860 0.0006641517 0.01899973 0.0005259697
## 8       8 0.03317918 0.15995957 0.02575309 0.0006320762 0.01732750 0.0005018923
## 9       9 0.03307505 0.16508344 0.02567850 0.0006160087 0.01698117 0.0004669829
## 10     10 0.03289696 0.17392287 0.02551290 0.0005889235 0.01709557 0.0004697173
## 11     11 0.03291761 0.17299007 0.02548476 0.0006006470 0.01761570 0.0004844542
## 12     12 0.03291500 0.17309136 0.02547065 0.0006001467 0.01673993 0.0004830773
## 13     13 0.03289015 0.17434453 0.02546783 0.0006426586 0.01637780 0.0004957932
## 14     14 0.03289115 0.17437202 0.02545558 0.0007018654 0.01725684 0.0005217945
## 15     15 0.03284130 0.17692317 0.02541656 0.0006895316 0.01819374 0.0005112121
## 16     16 0.03285292 0.17633756 0.02542029 0.0006849775 0.01755231 0.0004742705
## 17     17 0.03288050 0.17502441 0.02542037 0.0007001761 0.01707427 0.0005108211
## 18     18 0.03284653 0.17673743 0.02538417 0.0006608252 0.01623923 0.0004841676
## 19     19 0.03281035 0.17853299 0.02536574 0.0006804638 0.01691988 0.0004861180
## 20     20 0.03282852 0.17774842 0.02536737 0.0006985725 0.01802704 0.0005441927
## 21     21 0.03282387 0.17807758 0.02536963 0.0007020519 0.01899034 0.0005247229
## 22     22 0.03281721 0.17835409 0.02535781 0.0007478792 0.01765618 0.0005565969
## 23     23 0.03281606 0.17850053 0.02536217 0.0007643369 0.01726868 0.0005585367
## 24     24 0.03277458 0.18057033 0.02532659 0.0007737236 0.01810310 0.0005698552
## 25     25 0.03275424 0.18161604 0.02529412 0.0007729785 0.01852917 0.0005599684
## 26     26 0.03274555 0.18205508 0.02527989 0.0007743612 0.01720570 0.0005599813
## 27     27 0.03275617 0.18160642 0.02531297 0.0007525466 0.01736412 0.0005702312
## 28     28 0.03274722 0.18212918 0.02531023 0.0007550846 0.01780786 0.0005603116
## 29     29 0.03274248 0.18242858 0.02530561 0.0007520921 0.01870840 0.0005649725
## 30     30 0.03276122 0.18170121 0.02531808 0.0007626415 0.02145986 0.0005924937
## 31     31 0.03276700 0.18149081 0.02530708 0.0007718902 0.02159776 0.0006159969
## 32     32 0.03276724 0.18158825 0.02530714 0.0007508678 0.02098200 0.0005942022
## 33     33 0.03275745 0.18206617 0.02530168 0.0007722228 0.02075349 0.0006026849
## 34     34 0.03277003 0.18154548 0.02529327 0.0007861449 0.02004310 0.0006166167
## 35     35 0.03275854 0.18209049 0.02528278 0.0007758899 0.01936050 0.0006102044
## 36     36 0.03275283 0.18247046 0.02528710 0.0007772115 0.01974863 0.0006000074
## 37     37 0.03273280 0.18344212 0.02527515 0.0007851491 0.02020206 0.0006189663
## 38     38 0.03273120 0.18347258 0.02529402 0.0007659843 0.01880030 0.0006041981
## 39     39 0.03272612 0.18381833 0.02529317 0.0007709036 0.01920706 0.0005886535
## 40     40 0.03274881 0.18275508 0.02531486 0.0007545026 0.01847049 0.0005809108
## 41     41 0.03274517 0.18300083 0.02530796 0.0007733138 0.01867483 0.0005927297
## 42     42 0.03273334 0.18360593 0.02530827 0.0007869220 0.01914135 0.0005976127
## 43     43 0.03272596 0.18397989 0.02530937 0.0007812702 0.01921906 0.0006080321
## 44     44 0.03272226 0.18421630 0.02531490 0.0007869013 0.02027323 0.0006170151
## 45     45 0.03269645 0.18544399 0.02529149 0.0007960764 0.02066754 0.0006197800
## 46     46 0.03269297 0.18563802 0.02528636 0.0007819934 0.01976476 0.0006146125
## 47     47 0.03268554 0.18608130 0.02529066 0.0007865328 0.02077976 0.0006225590
## 48     48 0.03266589 0.18705427 0.02527464 0.0007731584 0.02069157 0.0006127158
## 49     49 0.03264013 0.18832802 0.02524665 0.0007636926 0.02184782 0.0006215270
## 50     50 0.03262778 0.18894371 0.02521976 0.0007605484 0.02251121 0.0006210315
## 51     51 0.03262757 0.18902082 0.02522083 0.0007627160 0.02276121 0.0006188413
## 52     52 0.03262166 0.18933341 0.02521727 0.0007484738 0.02278048 0.0006090828
## 53     53 0.03261484 0.18967629 0.02521303 0.0007582230 0.02247997 0.0006091574
## 54     54 0.03260950 0.18998952 0.02520686 0.0007862914 0.02306254 0.0006330481
## 55     55 0.03261712 0.18965289 0.02521829 0.0007981025 0.02309736 0.0006478188
## 56     56 0.03261504 0.18977405 0.02521836 0.0007912740 0.02325657 0.0006444193
## 57     57 0.03260563 0.19019966 0.02519422 0.0007786817 0.02276605 0.0006330119
## 58     58 0.03260971 0.19011302 0.02519044 0.0007662001 0.02333873 0.0006236315
## 59     59 0.03261659 0.18983970 0.02519206 0.0007642888 0.02318823 0.0006217051
## 60     60 0.03260440 0.19036368 0.02517719 0.0007718755 0.02282656 0.0006264902
## 61     61 0.03259218 0.19095432 0.02516626 0.0007821174 0.02307645 0.0006400026
## 62     62 0.03259303 0.19089492 0.02516661 0.0007900837 0.02252547 0.0006416166
## 63     63 0.03258217 0.19142166 0.02515979 0.0007718461 0.02203288 0.0006301853
## 64     64 0.03256364 0.19234287 0.02515018 0.0007856301 0.02318622 0.0006364208
## 65     65 0.03255628 0.19268552 0.02515685 0.0007904710 0.02308603 0.0006461861
## 66     66 0.03255017 0.19299142 0.02516438 0.0007833296 0.02289637 0.0006317919
## 67     67 0.03253868 0.19358761 0.02514991 0.0007654475 0.02272480 0.0006101303
## 68     68 0.03252983 0.19403305 0.02513689 0.0007669214 0.02300828 0.0006175003
## 69     69 0.03251754 0.19459792 0.02512558 0.0007722772 0.02305476 0.0006234664
## 70     70 0.03251374 0.19476325 0.02513282 0.0007733999 0.02350914 0.0006256036
## 71     71 0.03251851 0.19456018 0.02513564 0.0007831339 0.02363440 0.0006259255
## 72     72 0.03250841 0.19505513 0.02512635 0.0007881768 0.02377957 0.0006196110
## 73     73 0.03251057 0.19496552 0.02513355 0.0007819520 0.02370406 0.0006157345
## 74     74 0.03250952 0.19505437 0.02513277 0.0007692197 0.02352432 0.0006102482
## 75     75 0.03250184 0.19545061 0.02513201 0.0007715226 0.02360648 0.0006145732
## 76     76 0.03251050 0.19508395 0.02514031 0.0007712470 0.02404834 0.0006190926
## 77     77 0.03251979 0.19470836 0.02514972 0.0007812505 0.02450920 0.0006301492
## 78     78 0.03251731 0.19480519 0.02514165 0.0007872251 0.02364128 0.0006346228
## 79     79 0.03250661 0.19526827 0.02513272 0.0007804377 0.02308486 0.0006356887
## 80     80 0.03250361 0.19538470 0.02514052 0.0007756830 0.02287733 0.0006384733
## 81     81 0.03250243 0.19548840 0.02513783 0.0007809905 0.02329755 0.0006452859
## 82     82 0.03249485 0.19587348 0.02513612 0.0007899386 0.02382548 0.0006538916
## 83     83 0.03249926 0.19568511 0.02513794 0.0007977782 0.02357484 0.0006555270
## 84     84 0.03249764 0.19578373 0.02514249 0.0007905784 0.02335865 0.0006491801
## 85     85 0.03249020 0.19613918 0.02514446 0.0007926077 0.02387080 0.0006533251
## 86     86 0.03248392 0.19642923 0.02513816 0.0007842825 0.02364808 0.0006464797
## 87     87 0.03247319 0.19693759 0.02513056 0.0007750023 0.02379821 0.0006407801
## 88     88 0.03246894 0.19713148 0.02512504 0.0007738594 0.02377237 0.0006456344
## 89     89 0.03247160 0.19705435 0.02512687 0.0007722282 0.02375451 0.0006438975
## 90     90 0.03247634 0.19684827 0.02512544 0.0007744665 0.02377566 0.0006465266
## 91     91 0.03247165 0.19708338 0.02512616 0.0007754172 0.02394506 0.0006477233
## 92     92 0.03246893 0.19719213 0.02512729 0.0007647697 0.02394705 0.0006436013
## 93     93 0.03246443 0.19745046 0.02512434 0.0007639035 0.02452758 0.0006459812
## 94     94 0.03246587 0.19740582 0.02512102 0.0007673330 0.02469969 0.0006449982
## 95     95 0.03245853 0.19775077 0.02511543 0.0007698506 0.02489160 0.0006472834
## 96     96 0.03245107 0.19808767 0.02510572 0.0007747480 0.02498954 0.0006512270
## 97     97 0.03244268 0.19848268 0.02509407 0.0007751146 0.02514835 0.0006548558
## 98     98 0.03244221 0.19852521 0.02509709 0.0007768580 0.02539823 0.0006598683
## 99     99 0.03244029 0.19856494 0.02509394 0.0007763590 0.02517093 0.0006594982
## 100   100 0.03242578 0.19921787 0.02508321 0.0007759773 0.02504508 0.0006616697
## 101   101 0.03241853 0.19954239 0.02507774 0.0007758078 0.02518189 0.0006631844
## 102   102 0.03241591 0.19965453 0.02507708 0.0007729732 0.02517536 0.0006635895
## 103   103 0.03240499 0.20015232 0.02507482 0.0007739621 0.02525908 0.0006666102
## 104   104 0.03239982 0.20040189 0.02506789 0.0007649861 0.02520672 0.0006598198
## 105   105 0.03240117 0.20034924 0.02507373 0.0007630631 0.02543303 0.0006598189
## 106   106 0.03239840 0.20048181 0.02507193 0.0007612769 0.02531828 0.0006547925
## 107   107 0.03240175 0.20031187 0.02507593 0.0007638748 0.02515644 0.0006575391
## 108   108 0.03240294 0.20026608 0.02507538 0.0007644001 0.02500794 0.0006602883
## 109   109 0.03239873 0.20048456 0.02507125 0.0007654171 0.02519034 0.0006609220
## 110   110 0.03239459 0.20065839 0.02506665 0.0007677123 0.02476335 0.0006611028
## 111   111 0.03239797 0.20049773 0.02507160 0.0007696446 0.02482828 0.0006627744
## 112   112 0.03239876 0.20046701 0.02507007 0.0007747777 0.02492520 0.0006654006
## 113   113 0.03239941 0.20044980 0.02507223 0.0007748712 0.02513380 0.0006668409
## 114   114 0.03240234 0.20031797 0.02507529 0.0007735815 0.02505888 0.0006645252
## 115   115 0.03239845 0.20049766 0.02507418 0.0007672455 0.02488664 0.0006612124
## 116   116 0.03239582 0.20061452 0.02506970 0.0007696846 0.02493590 0.0006630723
## 117   117 0.03239498 0.20065914 0.02506741 0.0007719129 0.02510054 0.0006637116
## 118   118 0.03239517 0.20064767 0.02506808 0.0007733197 0.02507145 0.0006639964
## 119   119 0.03239868 0.20049399 0.02507132 0.0007750974 0.02505414 0.0006636076
## 120   120 0.03240049 0.20040514 0.02507188 0.0007687080 0.02492257 0.0006569604
## 121   121 0.03239859 0.20049541 0.02507214 0.0007692483 0.02497651 0.0006584755
## 122   122 0.03239859 0.20049254 0.02507250 0.0007697507 0.02495424 0.0006582021
## 123   123 0.03240069 0.20040237 0.02507444 0.0007664690 0.02492546 0.0006548281
## 124   124 0.03240037 0.20042131 0.02507205 0.0007687254 0.02502678 0.0006554686
## 125   125 0.03240377 0.20026253 0.02507517 0.0007683187 0.02501472 0.0006547100
## 126   126 0.03240443 0.20023559 0.02507431 0.0007652942 0.02497196 0.0006546665
## 127   127 0.03240335 0.20028453 0.02507365 0.0007657492 0.02497180 0.0006549634
## 128   128 0.03240499 0.20021101 0.02507490 0.0007648377 0.02495368 0.0006537676
## 129   129 0.03240521 0.20019865 0.02507561 0.0007641579 0.02499913 0.0006546046
## 130   130 0.03240618 0.20015443 0.02507559 0.0007641984 0.02501429 0.0006540456
## 131   131 0.03240604 0.20015733 0.02507552 0.0007640321 0.02503097 0.0006538033
## 132   132 0.03240666 0.20012907 0.02507585 0.0007640936 0.02504140 0.0006532104
## 133   133 0.03240726 0.20009977 0.02507666 0.0007639656 0.02504310 0.0006536631
## 134   134 0.03240786 0.20007536 0.02507707 0.0007641164 0.02504665 0.0006529795
## 135   135 0.03240758 0.20008570 0.02507672 0.0007642833 0.02504061 0.0006528860
## 136   136 0.03240816 0.20006010 0.02507680 0.0007643459 0.02505548 0.0006530196
## 137   137 0.03240795 0.20006894 0.02507642 0.0007643151 0.02506310 0.0006529651
## 138   138 0.03240803 0.20006564 0.02507649 0.0007645271 0.02506156 0.0006530139
## [1] "Best Model"
##     nvmax
## 110   110

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.0965368703  2.095697e+00  2.097377e+00
## PC1         -0.0009753762 -1.154814e-03 -7.959383e-04
## PC2          0.0012035314  1.005546e-03  1.401517e-03
## PC3         -0.0002767035 -4.948937e-04 -5.851338e-05
## PC4          0.0001683756 -5.192600e-05  3.886772e-04
## PC5          0.0006826866  4.583826e-04  9.069907e-04
## PC7         -0.0005099621 -7.392059e-04 -2.807183e-04
## PC8         -0.0002067595 -4.352876e-04  2.176856e-05
## PC9         -0.0002314497 -4.702156e-04  7.316330e-06
## PC10         0.0002611245  1.863521e-05  5.036139e-04
## PC11        -0.0013627613 -1.610508e-03 -1.115014e-03
## PC12         0.0003546051  1.072602e-04  6.019500e-04
## PC13         0.0004167756  1.611092e-04  6.724419e-04
## PC14         0.0015001118  1.246140e-03  1.754083e-03
## PC15        -0.0003584491 -6.189019e-04 -9.799628e-05
## PC16         0.0007904896  5.243205e-04  1.056659e-03
## PC17        -0.0001736328 -4.484116e-04  1.011461e-04
## PC18         0.0004064056  1.241334e-04  6.886777e-04
## PC19        -0.0003290534 -6.193746e-04 -3.873210e-05
## PC20         0.0009086458  6.043066e-04  1.212985e-03
## PC21        -0.0010243370 -1.335834e-03 -7.128398e-04
## PC22         0.0040105844  3.685930e-03  4.335238e-03
## PC23        -0.0005271298 -1.179198e-03  1.249380e-04
## PC24        -0.0014519611 -2.177307e-03 -7.266149e-04
## PC25         0.0009727363  2.537141e-04  1.691759e-03
## PC26        -0.0004758081 -1.200151e-03  2.485353e-04
## PC27        -0.0003703819 -1.098387e-03  3.576233e-04
## PC28         0.0007845835  5.403101e-05  1.515136e-03
## PC29        -0.0007201114 -1.451054e-03  1.083162e-05
## PC30         0.0006503129 -8.115961e-05  1.381786e-03
## PC31        -0.0003568879 -1.091877e-03  3.781014e-04
## PC32         0.0013278584  5.974717e-04  2.058245e-03
## PC36         0.0005845222 -1.552992e-04  1.324344e-03
## PC37        -0.0003920529 -1.130908e-03  3.468022e-04
## PC38         0.0004271293 -3.206515e-04  1.174910e-03
## PC39         0.0003401916 -4.101892e-04  1.090572e-03
## PC40         0.0003206294 -4.231597e-04  1.064418e-03
## PC41        -0.0007005179 -1.441132e-03  4.009582e-05
## PC42        -0.0005636011 -1.309979e-03  1.827765e-04
## PC43         0.0007001334 -5.084190e-05  1.451109e-03
## PC44        -0.0005123994 -1.259461e-03  2.346622e-04
## PC45         0.0012269099  4.783201e-04  1.975500e-03
## PC46         0.0015047325  7.527743e-04  2.256691e-03
## PC47         0.0005506639 -2.045439e-04  1.305872e-03
## PC50        -0.0008826750 -1.638728e-03 -1.266224e-04
## PC51         0.0005462567 -2.024028e-04  1.294916e-03
## PC52         0.0004098906 -3.434558e-04  1.163237e-03
## PC53        -0.0005576225 -1.319556e-03  2.043114e-04
## PC57         0.0004296186 -3.321397e-04  1.191377e-03
## PC58        -0.0005527647 -1.318095e-03  2.125653e-04
## PC59         0.0012702222  5.016661e-04  2.038778e-03
## PC60        -0.0008017669 -1.572844e-03 -3.068976e-05
## PC61        -0.0002846938 -1.062817e-03  4.934299e-04
## PC62        -0.0005640088 -1.331659e-03  2.036412e-04
## PC63         0.0006734582 -1.007575e-04  1.447674e-03
## PC65         0.0008926788  1.148954e-04  1.670462e-03
## PC67         0.0004050070 -3.737709e-04  1.183785e-03
## PC68         0.0008858627  1.080092e-04  1.663716e-03
## PC69        -0.0012529641 -2.032539e-03 -4.733894e-04
## PC71        -0.0008268591 -1.608376e-03 -4.534231e-05
## PC72         0.0008356291  5.699689e-05  1.614261e-03
## PC73         0.0005921479 -1.864336e-04  1.370729e-03
## PC75         0.0002538745 -5.370266e-04  1.044776e-03
## PC76         0.0016080450  8.253431e-04  2.390747e-03
## PC78        -0.0003849174 -1.174979e-03  4.051447e-04
## PC79         0.0008139782  2.750905e-05  1.600447e-03
## PC80        -0.0003822184 -1.170784e-03  4.063477e-04
## PC81        -0.0005777761 -1.373533e-03  2.179804e-04
## PC82        -0.0011160961 -1.905057e-03 -3.271354e-04
## PC83         0.0011666680  3.769274e-04  1.956409e-03
## PC85         0.0004159382 -3.784671e-04  1.210344e-03
## PC86         0.0005542520 -2.483770e-04  1.356881e-03
## PC87         0.0005841709 -2.138372e-04  1.382179e-03
## PC88         0.0004275564 -3.716779e-04  1.226791e-03
## PC89         0.0002772053 -5.162544e-04  1.070665e-03
## PC90        -0.0023723844 -3.170924e-03 -1.573844e-03
## PC91         0.0003996476 -3.990746e-04  1.198370e-03
## PC92         0.0002769611 -5.261904e-04  1.080113e-03
## PC94         0.0005395375 -2.654395e-04  1.344514e-03
## PC96        -0.0008826530 -1.691488e-03 -7.381836e-05
## PC97        -0.0003003734 -1.108344e-03  5.075967e-04
## PC98         0.0005875189 -2.247114e-04  1.399749e-03
## PC101       -0.0009914517 -1.799574e-03 -1.833293e-04
## PC102        0.0014484484  6.327960e-04  2.264101e-03
## PC104        0.0006386730 -1.761698e-04  1.453516e-03
## PC105        0.0002752159 -5.431010e-04  1.093533e-03
## PC106        0.0009514398  1.319122e-04  1.770967e-03
## PC107        0.0003922184 -4.250027e-04  1.209439e-03
## PC108       -0.0012110732 -2.030131e-03 -3.920157e-04
## PC109       -0.0003256071 -1.145878e-03  4.946643e-04
## PC110       -0.0004121828 -1.231737e-03  4.073715e-04
## PC111       -0.0008118751 -1.633126e-03  9.376251e-06
## PC112        0.0009103789  8.847136e-05  1.732287e-03
## PC114       -0.0003074862 -1.133391e-03  5.184185e-04
## PC115       -0.0006585795 -1.481574e-03  1.644146e-04
## PC117       -0.0016856941 -2.512477e-03 -8.589110e-04
## PC118       -0.0009465493 -1.775062e-03 -1.180364e-04
## PC119       -0.0008246670 -1.659911e-03  1.057664e-05
## PC120       -0.0005478879 -1.379798e-03  2.840228e-04
## PC121        0.0004080198 -4.288019e-04  1.244842e-03
## PC123       -0.0007145140 -1.551468e-03  1.224401e-04
## PC126        0.0010369835  1.990034e-04  1.874964e-03
## PC127        0.0010668332  2.360861e-04  1.897580e-03
## PC128       -0.0004767245 -1.320809e-03  3.673603e-04
## PC129       -0.0007336509 -1.574362e-03  1.070599e-04
## PC131        0.0011602279  3.167657e-04  2.003690e-03
## PC132        0.0015102884  6.702975e-04  2.350279e-03
## PC133       -0.0004892188 -1.335368e-03  3.569302e-04
## PC134       -0.0002865181 -1.131822e-03  5.587861e-04
## PC136        0.0006394666 -2.115833e-04  1.490516e-03
## PC138        0.0011348902  2.795909e-04  1.990190e-03

Test

if (algo.backward.caret == TRUE){
  test.model(model.backward, data.test
             ,method = 'leapBackward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.033   2.086   2.098   2.097   2.110   2.159 
## [1] "leapBackward  Test MSE: 0.00103301010705246"

Stepwise Selection with CV

Train

if (algo.stepwise.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapSeq"
                                   ,feature.names = feature.names)
  model.stepwise = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.03467963 0.08228409 0.02689668 0.0005837199 0.01755905 0.0002999811
## 2       2 0.03442479 0.09584815 0.02672288 0.0006696668 0.01801778 0.0003448861
## 3       3 0.03415987 0.11016571 0.02652062 0.0006149829 0.02064252 0.0004941685
## 4       4 0.03408152 0.11289736 0.02633266 0.0009629997 0.04348569 0.0007485688
## 5       5 0.03503216 0.06308580 0.02720604 0.0013437177 0.05333940 0.0009085872
## 6       6 0.03333923 0.15191631 0.02584422 0.0006931797 0.01936718 0.0005251475
## 7       7 0.03327598 0.15511291 0.02581860 0.0006641517 0.01899973 0.0005259697
## 8       8 0.03317918 0.15995957 0.02575309 0.0006320762 0.01732750 0.0005018923
## 9       9 0.03307505 0.16508344 0.02567850 0.0006160087 0.01698117 0.0004669829
## 10     10 0.03289696 0.17392287 0.02551290 0.0005889235 0.01709557 0.0004697173
## 11     11 0.03291761 0.17299007 0.02548476 0.0006006470 0.01761570 0.0004844542
## 12     12 0.03291500 0.17309136 0.02547065 0.0006001467 0.01673993 0.0004830773
## 13     13 0.03300691 0.16796378 0.02554539 0.0007434112 0.03151237 0.0005891436
## 14     14 0.03289115 0.17437202 0.02545558 0.0007018654 0.01725684 0.0005217945
## 15     15 0.03284130 0.17692317 0.02541656 0.0006895316 0.01819374 0.0005112121
## 16     16 0.03308062 0.16512590 0.02561277 0.0013482107 0.03814246 0.0009796056
## 17     17 0.03288050 0.17502441 0.02542037 0.0007001761 0.01707427 0.0005108211
## 18     18 0.03284653 0.17673743 0.02538417 0.0006608252 0.01623923 0.0004841676
## 19     19 0.03281035 0.17853299 0.02536574 0.0006804638 0.01691988 0.0004861180
## 20     20 0.03282852 0.17774842 0.02536737 0.0006985725 0.01802704 0.0005441927
## 21     21 0.03282387 0.17807758 0.02536963 0.0007020519 0.01899034 0.0005247229
## 22     22 0.03281721 0.17835409 0.02535781 0.0007478792 0.01765618 0.0005565969
## 23     23 0.03279416 0.17950156 0.02533902 0.0007710354 0.01681905 0.0005626326
## 24     24 0.03276334 0.18107409 0.02532165 0.0007719407 0.01813685 0.0005670953
## 25     25 0.03271591 0.18315678 0.02528615 0.0007809654 0.01798981 0.0005536892
## 26     26 0.03274555 0.18205508 0.02527989 0.0007743612 0.01720570 0.0005599813
## 27     27 0.03275617 0.18160642 0.02531297 0.0007525466 0.01736412 0.0005702312
## 28     28 0.03274722 0.18212918 0.02531023 0.0007550846 0.01780786 0.0005603116
## 29     29 0.03274907 0.18210413 0.02530666 0.0007472887 0.01808281 0.0005646727
## 30     30 0.03273195 0.18323515 0.02528217 0.0006921030 0.02197366 0.0005339153
## 31     31 0.03281139 0.17932949 0.02537001 0.0007478946 0.01851996 0.0006250217
## 32     32 0.03272805 0.18313790 0.02529497 0.0007598487 0.01950916 0.0005936236
## 33     33 0.03275751 0.18206275 0.02529869 0.0007721521 0.02074739 0.0006092603
## 34     34 0.03281716 0.17905227 0.02537900 0.0007640757 0.01810062 0.0006324991
## 35     35 0.03277224 0.18126512 0.02532581 0.0007696378 0.02026166 0.0006159179
## 36     36 0.03277967 0.18072391 0.02534971 0.0007772105 0.01945689 0.0005717192
## 37     37 0.03276149 0.18203072 0.02528329 0.0007559306 0.01799419 0.0006032075
## 38     38 0.03272513 0.18377459 0.02528503 0.0007688132 0.01902732 0.0006127174
## 39     39 0.03275369 0.18233881 0.02534653 0.0007590034 0.02089033 0.0005952164
## 40     40 0.03279530 0.18038476 0.02536128 0.0007477546 0.01826648 0.0005568410
## 41     41 0.03274199 0.18284460 0.02532289 0.0007728725 0.01926670 0.0006113832
## 42     42 0.03274882 0.18287806 0.02534019 0.0007832935 0.01926607 0.0006007436
## 43     43 0.03268881 0.18507042 0.02529678 0.0007869534 0.01895837 0.0006094646
## 44     44 0.03272616 0.18403239 0.02532161 0.0007862788 0.02007694 0.0006158943
## 45     45 0.03270987 0.18478282 0.02530714 0.0008094715 0.02062010 0.0006284692
## 46     46 0.03269297 0.18563802 0.02528636 0.0007819934 0.01976476 0.0006146125
## 47     47 0.03269100 0.18585816 0.02529231 0.0007841223 0.02078207 0.0006221532
## 48     48 0.03266589 0.18705427 0.02527464 0.0007731584 0.02069157 0.0006127158
## 49     49 0.03264800 0.18796071 0.02525149 0.0007839952 0.02194731 0.0006314512
## 50     50 0.03264159 0.18832484 0.02523095 0.0007536182 0.02209447 0.0006088194
## 51     51 0.03261166 0.18984262 0.02519236 0.0007233352 0.02282395 0.0005662149
## 52     52 0.03262634 0.18879036 0.02521499 0.0007468191 0.02291659 0.0006093385
## 53     53 0.03261722 0.18954526 0.02521700 0.0007572466 0.02261241 0.0006094490
## 54     54 0.03260484 0.19020910 0.02520390 0.0007749114 0.02304499 0.0006273369
## 55     55 0.03268563 0.18609832 0.02531057 0.0007768144 0.02292621 0.0006591156
## 56     56 0.03263734 0.18869120 0.02523485 0.0007436321 0.02195299 0.0006129892
## 57     57 0.03260563 0.19019966 0.02519422 0.0007786817 0.02276605 0.0006330119
## 58     58 0.03262279 0.18912175 0.02522090 0.0007623225 0.02359935 0.0006193823
## 59     59 0.03261659 0.18983970 0.02519206 0.0007642888 0.02318823 0.0006217051
## 60     60 0.03261815 0.18947035 0.02520524 0.0007910854 0.02435107 0.0006697634
## 61     61 0.03263753 0.18860109 0.02522428 0.0007681604 0.02250355 0.0006063660
## 62     62 0.03259303 0.19089492 0.02516661 0.0007900837 0.02252547 0.0006416166
## 63     63 0.03260712 0.19006653 0.02520203 0.0007652984 0.02062956 0.0006443995
## 64     64 0.03256364 0.19234287 0.02515018 0.0007856301 0.02318622 0.0006364208
## 65     65 0.03259881 0.19048670 0.02519839 0.0007783919 0.02546733 0.0006558452
## 66     66 0.03256486 0.19190649 0.02518116 0.0007790796 0.02326214 0.0006294067
## 67     67 0.03258032 0.19153039 0.02516757 0.0007223986 0.01975966 0.0005859326
## 68     68 0.03259155 0.19093304 0.02518827 0.0007518598 0.02226614 0.0005874343
## 69     69 0.03252479 0.19424156 0.02512988 0.0007714158 0.02294470 0.0006242369
## 70     70 0.03251497 0.19470830 0.02513533 0.0007730129 0.02352078 0.0006247728
## 71     71 0.03253124 0.19358899 0.02515974 0.0007794250 0.02395276 0.0006210460
## 72     72 0.03250892 0.19502944 0.02512512 0.0007879031 0.02374897 0.0006199484
## 73     73 0.03253162 0.19354765 0.02515807 0.0007759306 0.02378695 0.0006109187
## 74     74 0.03250952 0.19505437 0.02513277 0.0007692197 0.02352432 0.0006102482
## 75     75 0.03254705 0.19325111 0.02518909 0.0007605011 0.02200360 0.0006287970
## 76     76 0.03250992 0.19510979 0.02513718 0.0007715749 0.02407926 0.0006198025
## 77     77 0.03252045 0.19450464 0.02515398 0.0007826862 0.02471267 0.0006313894
## 78     78 0.03255342 0.19293584 0.02518034 0.0008369407 0.02684396 0.0006860392
## 79     79 0.03249980 0.19559464 0.02513026 0.0007885672 0.02369816 0.0006374617
## 80     80 0.03251423 0.19458928 0.02516449 0.0007694079 0.02309650 0.0006266216
## 81     81 0.03256069 0.19157312 0.02521780 0.0007191655 0.01805450 0.0006124557
## 82     82 0.03249658 0.19570897 0.02514157 0.0007900282 0.02378043 0.0006571477
## 83     83 0.03246234 0.19683739 0.02511635 0.0007950765 0.02225026 0.0006504797
## 84     84 0.03249505 0.19589221 0.02513827 0.0007920136 0.02344032 0.0006532950
## 85     85 0.03251394 0.19496836 0.02515540 0.0007616938 0.02181309 0.0006304042
## 86     86 0.03254647 0.19342722 0.02520713 0.0007745715 0.02335332 0.0006141731
## 87     87 0.03247319 0.19693759 0.02513056 0.0007750023 0.02379821 0.0006407801
## 88     88 0.03246894 0.19713148 0.02512504 0.0007738594 0.02377237 0.0006456344
## 89     89 0.03253488 0.19400802 0.02519850 0.0007594610 0.02326180 0.0006076405
## 90     90 0.03249498 0.19591099 0.02514641 0.0007754728 0.02364027 0.0006558339
## 91     91 0.03247165 0.19708338 0.02512616 0.0007754172 0.02394506 0.0006477233
## 92     92 0.03241849 0.19905414 0.02509471 0.0007631796 0.02195452 0.0006390964
## 93     93 0.03246395 0.19745474 0.02512683 0.0007698467 0.02471194 0.0006544658
## 94     94 0.03248771 0.19632375 0.02514645 0.0007678721 0.02450056 0.0006566466
## 95     95 0.03251857 0.19436863 0.02521454 0.0007758129 0.02538281 0.0006053793
## 96     96 0.03244968 0.19814110 0.02510522 0.0007729884 0.02491682 0.0006505106
## 97     97 0.03244112 0.19852623 0.02509347 0.0007731527 0.02508986 0.0006539841
## 98     98 0.03244221 0.19852521 0.02509709 0.0007768580 0.02539823 0.0006598683
## 99     99 0.03245850 0.19754605 0.02510645 0.0008006886 0.02667332 0.0006781497
## 100   100 0.03242578 0.19921787 0.02508321 0.0007759773 0.02504508 0.0006616697
## 101   101 0.03241853 0.19954239 0.02507774 0.0007758078 0.02518189 0.0006631844
## 102   102 0.03241591 0.19965453 0.02507708 0.0007729732 0.02517536 0.0006635895
## 103   103 0.03240460 0.19995127 0.02509631 0.0007741166 0.02528635 0.0006606995
## 104   104 0.03239982 0.20040189 0.02506789 0.0007649861 0.02520672 0.0006598198
## 105   105 0.03240117 0.20034924 0.02507373 0.0007630631 0.02543303 0.0006598189
## 106   106 0.03241747 0.19955917 0.02510239 0.0007518640 0.02478953 0.0006274008
## 107   107 0.03240315 0.20024726 0.02507623 0.0007630758 0.02517725 0.0006575062
## 108   108 0.03240294 0.20026608 0.02507538 0.0007644001 0.02500794 0.0006602883
## 109   109 0.03245428 0.19768866 0.02510481 0.0007482636 0.02745015 0.0006638287
## 110   110 0.03237410 0.20125291 0.02507886 0.0007597344 0.02333334 0.0006393645
## 111   111 0.03239716 0.20053929 0.02507084 0.0007686129 0.02477107 0.0006616613
## 112   112 0.03239876 0.20046701 0.02507007 0.0007747777 0.02492520 0.0006654006
## 113   113 0.03240335 0.20004254 0.02507714 0.0008094196 0.02616571 0.0006473598
## 114   114 0.03240234 0.20031797 0.02507529 0.0007735815 0.02505888 0.0006645252
## 115   115 0.03239845 0.20049766 0.02507418 0.0007672455 0.02488664 0.0006612124
## 116   116 0.03243778 0.19848542 0.02510502 0.0007390359 0.02488888 0.0006379899
## 117   117 0.03241519 0.19968706 0.02509255 0.0007712642 0.02483248 0.0006749391
## 118   118 0.03239806 0.20047124 0.02507719 0.0007717110 0.02495168 0.0006550911
## 119   119 0.03237917 0.20123192 0.02507658 0.0007823009 0.02485415 0.0006572140
## 120   120 0.03240964 0.19994448 0.02506974 0.0007528330 0.02472773 0.0006328649
## 121   121 0.03239122 0.20083773 0.02504770 0.0007517740 0.02500886 0.0006213507
## 122   122 0.03239859 0.20049254 0.02507250 0.0007697507 0.02495424 0.0006582021
## 123   123 0.03240069 0.20040237 0.02507444 0.0007664690 0.02492546 0.0006548281
## 124   124 0.03241151 0.19987427 0.02507401 0.0007632399 0.02526600 0.0006553041
## 125   125 0.03240377 0.20026253 0.02507517 0.0007683187 0.02501472 0.0006547100
## 126   126 0.03240443 0.20023559 0.02507431 0.0007652942 0.02497196 0.0006546665
## 127   127 0.03241569 0.19969256 0.02507426 0.0007477748 0.02388997 0.0006536403
## 128   128 0.03242435 0.19918485 0.02507819 0.0007576491 0.02575903 0.0006528819
## 129   129 0.03239010 0.20086169 0.02507736 0.0007694344 0.02488871 0.0006540729
## 130   130 0.03242529 0.19915855 0.02506664 0.0007129767 0.02515401 0.0006436195
## 131   131 0.03240629 0.20003360 0.02506883 0.0007578965 0.02515077 0.0006404516
## 132   132 0.03240423 0.20018304 0.02508393 0.0007626164 0.02470996 0.0006432310
## 133   133 0.03240670 0.20008861 0.02507276 0.0007664146 0.02500670 0.0006449905
## 134   134 0.03241419 0.19978348 0.02508616 0.0007606539 0.02489179 0.0006421327
## 135   135 0.03241892 0.19952758 0.02508675 0.0007632568 0.02486841 0.0006372161
## 136   136 0.03240766 0.20008603 0.02507608 0.0007652442 0.02523859 0.0006522533
## 137   137 0.03240520 0.20019252 0.02507403 0.0007642818 0.02511679 0.0006516122
## 138   138 0.03240803 0.20006564 0.02507649 0.0007645271 0.02506156 0.0006530139
## [1] "Best Model"
##     nvmax
## 110   110

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.0965368703  2.095697e+00  2.097377e+00
## PC1         -0.0009753762 -1.154814e-03 -7.959383e-04
## PC2          0.0012035314  1.005546e-03  1.401517e-03
## PC3         -0.0002767035 -4.948937e-04 -5.851338e-05
## PC4          0.0001683756 -5.192600e-05  3.886772e-04
## PC5          0.0006826866  4.583826e-04  9.069907e-04
## PC7         -0.0005099621 -7.392059e-04 -2.807183e-04
## PC8         -0.0002067595 -4.352876e-04  2.176856e-05
## PC9         -0.0002314497 -4.702156e-04  7.316330e-06
## PC10         0.0002611245  1.863521e-05  5.036139e-04
## PC11        -0.0013627613 -1.610508e-03 -1.115014e-03
## PC12         0.0003546051  1.072602e-04  6.019500e-04
## PC13         0.0004167756  1.611092e-04  6.724419e-04
## PC14         0.0015001118  1.246140e-03  1.754083e-03
## PC15        -0.0003584491 -6.189019e-04 -9.799628e-05
## PC16         0.0007904896  5.243205e-04  1.056659e-03
## PC17        -0.0001736328 -4.484116e-04  1.011461e-04
## PC18         0.0004064056  1.241334e-04  6.886777e-04
## PC19        -0.0003290534 -6.193746e-04 -3.873210e-05
## PC20         0.0009086458  6.043066e-04  1.212985e-03
## PC21        -0.0010243370 -1.335834e-03 -7.128398e-04
## PC22         0.0040105844  3.685930e-03  4.335238e-03
## PC23        -0.0005271298 -1.179198e-03  1.249380e-04
## PC24        -0.0014519611 -2.177307e-03 -7.266149e-04
## PC25         0.0009727363  2.537141e-04  1.691759e-03
## PC26        -0.0004758081 -1.200151e-03  2.485353e-04
## PC27        -0.0003703819 -1.098387e-03  3.576233e-04
## PC28         0.0007845835  5.403101e-05  1.515136e-03
## PC29        -0.0007201114 -1.451054e-03  1.083162e-05
## PC30         0.0006503129 -8.115961e-05  1.381786e-03
## PC31        -0.0003568879 -1.091877e-03  3.781014e-04
## PC32         0.0013278584  5.974717e-04  2.058245e-03
## PC36         0.0005845222 -1.552992e-04  1.324344e-03
## PC37        -0.0003920529 -1.130908e-03  3.468022e-04
## PC38         0.0004271293 -3.206515e-04  1.174910e-03
## PC39         0.0003401916 -4.101892e-04  1.090572e-03
## PC40         0.0003206294 -4.231597e-04  1.064418e-03
## PC41        -0.0007005179 -1.441132e-03  4.009582e-05
## PC42        -0.0005636011 -1.309979e-03  1.827765e-04
## PC43         0.0007001334 -5.084190e-05  1.451109e-03
## PC44        -0.0005123994 -1.259461e-03  2.346622e-04
## PC45         0.0012269099  4.783201e-04  1.975500e-03
## PC46         0.0015047325  7.527743e-04  2.256691e-03
## PC47         0.0005506639 -2.045439e-04  1.305872e-03
## PC50        -0.0008826750 -1.638728e-03 -1.266224e-04
## PC51         0.0005462567 -2.024028e-04  1.294916e-03
## PC52         0.0004098906 -3.434558e-04  1.163237e-03
## PC53        -0.0005576225 -1.319556e-03  2.043114e-04
## PC57         0.0004296186 -3.321397e-04  1.191377e-03
## PC58        -0.0005527647 -1.318095e-03  2.125653e-04
## PC59         0.0012702222  5.016661e-04  2.038778e-03
## PC60        -0.0008017669 -1.572844e-03 -3.068976e-05
## PC61        -0.0002846938 -1.062817e-03  4.934299e-04
## PC62        -0.0005640088 -1.331659e-03  2.036412e-04
## PC63         0.0006734582 -1.007575e-04  1.447674e-03
## PC65         0.0008926788  1.148954e-04  1.670462e-03
## PC67         0.0004050070 -3.737709e-04  1.183785e-03
## PC68         0.0008858627  1.080092e-04  1.663716e-03
## PC69        -0.0012529641 -2.032539e-03 -4.733894e-04
## PC71        -0.0008268591 -1.608376e-03 -4.534231e-05
## PC72         0.0008356291  5.699689e-05  1.614261e-03
## PC73         0.0005921479 -1.864336e-04  1.370729e-03
## PC75         0.0002538745 -5.370266e-04  1.044776e-03
## PC76         0.0016080450  8.253431e-04  2.390747e-03
## PC78        -0.0003849174 -1.174979e-03  4.051447e-04
## PC79         0.0008139782  2.750905e-05  1.600447e-03
## PC80        -0.0003822184 -1.170784e-03  4.063477e-04
## PC81        -0.0005777761 -1.373533e-03  2.179804e-04
## PC82        -0.0011160961 -1.905057e-03 -3.271354e-04
## PC83         0.0011666680  3.769274e-04  1.956409e-03
## PC85         0.0004159382 -3.784671e-04  1.210344e-03
## PC86         0.0005542520 -2.483770e-04  1.356881e-03
## PC87         0.0005841709 -2.138372e-04  1.382179e-03
## PC88         0.0004275564 -3.716779e-04  1.226791e-03
## PC89         0.0002772053 -5.162544e-04  1.070665e-03
## PC90        -0.0023723844 -3.170924e-03 -1.573844e-03
## PC91         0.0003996476 -3.990746e-04  1.198370e-03
## PC92         0.0002769611 -5.261904e-04  1.080113e-03
## PC94         0.0005395375 -2.654395e-04  1.344514e-03
## PC96        -0.0008826530 -1.691488e-03 -7.381836e-05
## PC97        -0.0003003734 -1.108344e-03  5.075967e-04
## PC98         0.0005875189 -2.247114e-04  1.399749e-03
## PC101       -0.0009914517 -1.799574e-03 -1.833293e-04
## PC102        0.0014484484  6.327960e-04  2.264101e-03
## PC104        0.0006386730 -1.761698e-04  1.453516e-03
## PC105        0.0002752159 -5.431010e-04  1.093533e-03
## PC106        0.0009514398  1.319122e-04  1.770967e-03
## PC107        0.0003922184 -4.250027e-04  1.209439e-03
## PC108       -0.0012110732 -2.030131e-03 -3.920157e-04
## PC109       -0.0003256071 -1.145878e-03  4.946643e-04
## PC110       -0.0004121828 -1.231737e-03  4.073715e-04
## PC111       -0.0008118751 -1.633126e-03  9.376251e-06
## PC112        0.0009103789  8.847136e-05  1.732287e-03
## PC114       -0.0003074862 -1.133391e-03  5.184185e-04
## PC115       -0.0006585795 -1.481574e-03  1.644146e-04
## PC117       -0.0016856941 -2.512477e-03 -8.589110e-04
## PC118       -0.0009465493 -1.775062e-03 -1.180364e-04
## PC119       -0.0008246670 -1.659911e-03  1.057664e-05
## PC120       -0.0005478879 -1.379798e-03  2.840228e-04
## PC121        0.0004080198 -4.288019e-04  1.244842e-03
## PC123       -0.0007145140 -1.551468e-03  1.224401e-04
## PC126        0.0010369835  1.990034e-04  1.874964e-03
## PC127        0.0010668332  2.360861e-04  1.897580e-03
## PC128       -0.0004767245 -1.320809e-03  3.673603e-04
## PC129       -0.0007336509 -1.574362e-03  1.070599e-04
## PC131        0.0011602279  3.167657e-04  2.003690e-03
## PC132        0.0015102884  6.702975e-04  2.350279e-03
## PC133       -0.0004892188 -1.335368e-03  3.569302e-04
## PC134       -0.0002865181 -1.131822e-03  5.587861e-04
## PC136        0.0006394666 -2.115833e-04  1.490516e-03
## PC138        0.0011348902  2.795909e-04  1.990190e-03

Test

if (algo.stepwise.caret == TRUE){
  test.model(model.stepwise, data.test
             ,method = 'leapSeq',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
  
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.033   2.086   2.098   2.097   2.110   2.159 
## [1] "leapSeq  Test MSE: 0.00103301010705247"

LASSO with CV

Train

if (algo.LASSO.caret == TRUE){
  set.seed(1)
  tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "glmnet"
                                   ,subopt = 'LASSO'
                                   ,tune.grid = tune.grid
                                   ,feature.names = feature.names)
  model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.000145 on full training set
## glmnet 
## 
## 5584 samples
##  138 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE        Rsquared    MAE       
##   0.0001000000  0.03236190  0.20105937  0.02505164
##   0.0001047616  0.03236087  0.20107268  0.02505108
##   0.0001097499  0.03235987  0.20108411  0.02505059
##   0.0001149757  0.03235902  0.20108916  0.02505025
##   0.0001204504  0.03235824  0.20109125  0.02504994
##   0.0001261857  0.03235762  0.20108618  0.02504978
##   0.0001321941  0.03235714  0.20107529  0.02504983
##   0.0001384886  0.03235684  0.20105672  0.02505005
##   0.0001450829  0.03235671  0.20103063  0.02505047
##   0.0001519911  0.03235672  0.20099938  0.02505103
##   0.0001592283  0.03235693  0.20095987  0.02505166
##   0.0001668101  0.03235731  0.20091382  0.02505242
##   0.0001747528  0.03235787  0.20086176  0.02505337
##   0.0001830738  0.03235863  0.20080214  0.02505447
##   0.0001917910  0.03235968  0.20073138  0.02505567
##   0.0002009233  0.03236096  0.20065325  0.02505697
##   0.0002104904  0.03236269  0.20055655  0.02505866
##   0.0002205131  0.03236483  0.20044367  0.02506067
##   0.0002310130  0.03236750  0.20030962  0.02506310
##   0.0002420128  0.03237082  0.20014903  0.02506586
##   0.0002535364  0.03237460  0.19997140  0.02506892
##   0.0002656088  0.03237883  0.19977786  0.02507257
##   0.0002782559  0.03238329  0.19958041  0.02507641
##   0.0002915053  0.03238812  0.19937239  0.02508088
##   0.0003053856  0.03239347  0.19914718  0.02508587
##   0.0003199267  0.03239945  0.19889884  0.02509131
##   0.0003351603  0.03240619  0.19862224  0.02509721
##   0.0003511192  0.03241335  0.19833494  0.02510323
##   0.0003678380  0.03242122  0.19802277  0.02510972
##   0.0003853529  0.03242955  0.19769932  0.02511616
##   0.0004037017  0.03243891  0.19733726  0.02512347
##   0.0004229243  0.03244931  0.19693628  0.02513134
##   0.0004430621  0.03246079  0.19649531  0.02514009
##   0.0004641589  0.03247308  0.19602820  0.02514951
##   0.0004862602  0.03248691  0.19549774  0.02516117
##   0.0005094138  0.03250204  0.19491787  0.02517408
##   0.0005336699  0.03251877  0.19427109  0.02518844
##   0.0005590810  0.03253658  0.19358637  0.02520341
##   0.0005857021  0.03255636  0.19281423  0.02522000
##   0.0006135907  0.03257792  0.19196344  0.02523807
##   0.0006428073  0.03260141  0.19102673  0.02525775
##   0.0006734151  0.03262631  0.19003400  0.02527958
##   0.0007054802  0.03265334  0.18894158  0.02530313
##   0.0007390722  0.03268001  0.18788934  0.02532709
##   0.0007742637  0.03270867  0.18674610  0.02535227
##   0.0008111308  0.03273750  0.18561151  0.02537684
##   0.0008497534  0.03276797  0.18440397  0.02540308
##   0.0008902151  0.03279810  0.18323798  0.02542870
##   0.0009326033  0.03282889  0.18206251  0.02545510
##   0.0009770100  0.03285878  0.18096998  0.02547978
##   0.0010235310  0.03289140  0.17974716  0.02550598
##   0.0010722672  0.03292430  0.17854335  0.02553380
##   0.0011233240  0.03296029  0.17717564  0.02556356
##   0.0011768120  0.03299728  0.17576951  0.02559499
##   0.0012328467  0.03303672  0.17423923  0.02562847
##   0.0012915497  0.03307535  0.17278254  0.02566136
##   0.0013530478  0.03311624  0.17121533  0.02569607
##   0.0014174742  0.03315617  0.16973972  0.02572815
##   0.0014849683  0.03319890  0.16812391  0.02576203
##   0.0015556761  0.03324158  0.16654687  0.02579542
##   0.0016297508  0.03328685  0.16484952  0.02583098
##   0.0017073526  0.03333220  0.16320084  0.02586724
##   0.0017886495  0.03338000  0.16144753  0.02590568
##   0.0018738174  0.03342712  0.15978295  0.02594193
##   0.0019630407  0.03347856  0.15788606  0.02598108
##   0.0020565123  0.03353399  0.15575811  0.02602244
##   0.0021544347  0.03359369  0.15337352  0.02606660
##   0.0022570197  0.03365683  0.15076893  0.02611286
##   0.0023644894  0.03372304  0.14797064  0.02616167
##   0.0024770764  0.03378617  0.14542029  0.02620831
##   0.0025950242  0.03384906  0.14294419  0.02625508
##   0.0027185882  0.03390323  0.14121072  0.02629583
##   0.0028480359  0.03395991  0.13937395  0.02633826
##   0.0029836472  0.03401841  0.13750608  0.02638257
##   0.0031257158  0.03408236  0.13530288  0.02643130
##   0.0032745492  0.03415226  0.13269500  0.02648415
##   0.0034304693  0.03422882  0.12957397  0.02654150
##   0.0035938137  0.03431265  0.12582703  0.02660394
##   0.0037649358  0.03440441  0.12131819  0.02667220
##   0.0039442061  0.03450485  0.11588703  0.02674552
##   0.0041320124  0.03461439  0.10938613  0.02682428
##   0.0043287613  0.03473277  0.10172545  0.02691068
##   0.0045348785  0.03485016  0.09386179  0.02699699
##   0.0047508102  0.03494206  0.08810816  0.02706372
##   0.0049770236  0.03502160  0.08351759  0.02712016
##   0.0052140083  0.03506365  0.08268736  0.02714938
##   0.0054622772  0.03510491  0.08228434  0.02717880
##   0.0057223677  0.03514622  0.08228409  0.02720772
##   0.0059948425  0.03519151  0.08228409  0.02723899
##   0.0062802914  0.03524115  0.08228409  0.02727347
##   0.0065793322  0.03529556  0.08228409  0.02731186
##   0.0068926121  0.03535518  0.08228409  0.02735411
##   0.0072208090  0.03542051  0.08228409  0.02740023
##   0.0075646333  0.03549207  0.08228409  0.02745106
##   0.0079248290  0.03557046  0.08228409  0.02750742
##   0.0083021757  0.03565629  0.08228409  0.02756928
##   0.0086974900  0.03575027  0.08228409  0.02763929
##   0.0091116276  0.03585313  0.08228409  0.02771663
##   0.0095454846  0.03596570  0.08228409  0.02780066
##   0.0100000000  0.03608884  0.08228409  0.02789191
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.0001450829.

##   alpha       lambda
## 9     1 0.0001450829
##     alpha       lambda       RMSE   Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.0001000000 0.03236190 0.20105937 0.02505164 0.0007599981 0.02418576 0.0006347056
## 2       1 0.0001047616 0.03236087 0.20107268 0.02505108 0.0007597587 0.02414405 0.0006337053
## 3       1 0.0001097499 0.03235987 0.20108411 0.02505059 0.0007594801 0.02410002 0.0006326823
## 4       1 0.0001149757 0.03235902 0.20108916 0.02505025 0.0007591934 0.02405581 0.0006316629
## 5       1 0.0001204504 0.03235824 0.20109125 0.02504994 0.0007588221 0.02400944 0.0006306606
## 6       1 0.0001261857 0.03235762 0.20108618 0.02504978 0.0007584236 0.02396174 0.0006296174
## 7       1 0.0001321941 0.03235714 0.20107529 0.02504983 0.0007579951 0.02391067 0.0006282957
## 8       1 0.0001384886 0.03235684 0.20105672 0.02505005 0.0007575193 0.02385592 0.0006268758
## 9       1 0.0001450829 0.03235671 0.20103063 0.02505047 0.0007570231 0.02379725 0.0006254009
## 10      1 0.0001519911 0.03235672 0.20099938 0.02505103 0.0007565016 0.02373705 0.0006238238
## 11      1 0.0001592283 0.03235693 0.20095987 0.02505166 0.0007559951 0.02367192 0.0006221452
## 12      1 0.0001668101 0.03235731 0.20091382 0.02505242 0.0007554487 0.02359960 0.0006203742
## 13      1 0.0001747528 0.03235787 0.20086176 0.02505337 0.0007547587 0.02352493 0.0006185562
## 14      1 0.0001830738 0.03235863 0.20080214 0.02505447 0.0007540248 0.02344416 0.0006163596
## 15      1 0.0001917910 0.03235968 0.20073138 0.02505567 0.0007532908 0.02335947 0.0006139964
## 16      1 0.0002009233 0.03236096 0.20065325 0.02505697 0.0007524909 0.02326688 0.0006114605
## 17      1 0.0002104904 0.03236269 0.20055655 0.02505866 0.0007518047 0.02316899 0.0006087151
## 18      1 0.0002205131 0.03236483 0.20044367 0.02506067 0.0007510809 0.02306587 0.0006059799
## 19      1 0.0002310130 0.03236750 0.20030962 0.02506310 0.0007505097 0.02295739 0.0006030041
## 20      1 0.0002420128 0.03237082 0.20014903 0.02506586 0.0007500482 0.02285112 0.0005999298
## 21      1 0.0002535364 0.03237460 0.19997140 0.02506892 0.0007494675 0.02273865 0.0005967893
## 22      1 0.0002656088 0.03237883 0.19977786 0.02507257 0.0007488625 0.02260706 0.0005940851
## 23      1 0.0002782559 0.03238329 0.19958041 0.02507641 0.0007475076 0.02247005 0.0005910390
## 24      1 0.0002915053 0.03238812 0.19937239 0.02508088 0.0007462742 0.02232794 0.0005880350
## 25      1 0.0003053856 0.03239347 0.19914718 0.02508587 0.0007448160 0.02217993 0.0005846457
## 26      1 0.0003199267 0.03239945 0.19889884 0.02509131 0.0007434012 0.02202899 0.0005814037
## 27      1 0.0003351603 0.03240619 0.19862224 0.02509721 0.0007416965 0.02187253 0.0005779455
## 28      1 0.0003511192 0.03241335 0.19833494 0.02510323 0.0007399613 0.02169278 0.0005744786
## 29      1 0.0003678380 0.03242122 0.19802277 0.02510972 0.0007378196 0.02150320 0.0005701840
## 30      1 0.0003853529 0.03242955 0.19769932 0.02511616 0.0007357252 0.02129322 0.0005658572
## 31      1 0.0004037017 0.03243891 0.19733726 0.02512347 0.0007331974 0.02106920 0.0005611342
## 32      1 0.0004229243 0.03244931 0.19693628 0.02513134 0.0007308006 0.02084388 0.0005561445
## 33      1 0.0004430621 0.03246079 0.19649531 0.02514009 0.0007276888 0.02061046 0.0005510806
## 34      1 0.0004641589 0.03247308 0.19602820 0.02514951 0.0007246315 0.02037584 0.0005462623
## 35      1 0.0004862602 0.03248691 0.19549774 0.02516117 0.0007216637 0.02014482 0.0005414703
## 36      1 0.0005094138 0.03250204 0.19491787 0.02517408 0.0007190359 0.01994399 0.0005370409
## 37      1 0.0005336699 0.03251877 0.19427109 0.02518844 0.0007165231 0.01974554 0.0005321195
## 38      1 0.0005590810 0.03253658 0.19358637 0.02520341 0.0007145125 0.01954877 0.0005277197
## 39      1 0.0005857021 0.03255636 0.19281423 0.02522000 0.0007124107 0.01934101 0.0005234516
## 40      1 0.0006135907 0.03257792 0.19196344 0.02523807 0.0007107217 0.01911139 0.0005196218
## 41      1 0.0006428073 0.03260141 0.19102673 0.02525775 0.0007084506 0.01888449 0.0005141496
## 42      1 0.0006734151 0.03262631 0.19003400 0.02527958 0.0007063343 0.01868250 0.0005084231
## 43      1 0.0007054802 0.03265334 0.18894158 0.02530313 0.0007039939 0.01849979 0.0005016792
## 44      1 0.0007390722 0.03268001 0.18788934 0.02532709 0.0007023441 0.01830260 0.0004953880
## 45      1 0.0007742637 0.03270867 0.18674610 0.02535227 0.0006994833 0.01813962 0.0004879891
## 46      1 0.0008111308 0.03273750 0.18561151 0.02537684 0.0006975315 0.01793709 0.0004821234
## 47      1 0.0008497534 0.03276797 0.18440397 0.02540308 0.0006947957 0.01778303 0.0004770644
## 48      1 0.0008902151 0.03279810 0.18323798 0.02542870 0.0006926994 0.01767524 0.0004731975
## 49      1 0.0009326033 0.03282889 0.18206251 0.02545510 0.0006864006 0.01763383 0.0004671987
## 50      1 0.0009770100 0.03285878 0.18096998 0.02547978 0.0006807591 0.01756922 0.0004617207
## 51      1 0.0010235310 0.03289140 0.17974716 0.02550598 0.0006752897 0.01754158 0.0004557867
## 52      1 0.0010722672 0.03292430 0.17854335 0.02553380 0.0006704859 0.01754072 0.0004505885
## 53      1 0.0011233240 0.03296029 0.17717564 0.02556356 0.0006655448 0.01755728 0.0004458497
## 54      1 0.0011768120 0.03299728 0.17576951 0.02559499 0.0006611257 0.01754051 0.0004418841
## 55      1 0.0012328467 0.03303672 0.17423923 0.02562847 0.0006572321 0.01761480 0.0004381028
## 56      1 0.0012915497 0.03307535 0.17278254 0.02566136 0.0006550717 0.01772233 0.0004347874
## 57      1 0.0013530478 0.03311624 0.17121533 0.02569607 0.0006525182 0.01792433 0.0004319126
## 58      1 0.0014174742 0.03315617 0.16973972 0.02572815 0.0006519709 0.01810061 0.0004306213
## 59      1 0.0014849683 0.03319890 0.16812391 0.02576203 0.0006514785 0.01833071 0.0004284061
## 60      1 0.0015556761 0.03324158 0.16654687 0.02579542 0.0006521901 0.01844395 0.0004261198
## 61      1 0.0016297508 0.03328685 0.16484952 0.02583098 0.0006525437 0.01860566 0.0004231999
## 62      1 0.0017073526 0.03333220 0.16320084 0.02586724 0.0006541798 0.01873564 0.0004194866
## 63      1 0.0017886495 0.03338000 0.16144753 0.02590568 0.0006559242 0.01896662 0.0004164559
## 64      1 0.0018738174 0.03342712 0.15978295 0.02594193 0.0006587158 0.01915624 0.0004130941
## 65      1 0.0019630407 0.03347856 0.15788606 0.02598108 0.0006622427 0.01938369 0.0004106596
## 66      1 0.0020565123 0.03353399 0.15575811 0.02602244 0.0006666872 0.01963081 0.0004090294
## 67      1 0.0021544347 0.03359369 0.15337352 0.02606660 0.0006685037 0.01996397 0.0004060412
## 68      1 0.0022570197 0.03365683 0.15076893 0.02611286 0.0006701500 0.02012742 0.0004031049
## 69      1 0.0023644894 0.03372304 0.14797064 0.02616167 0.0006699253 0.02038046 0.0003989019
## 70      1 0.0024770764 0.03378617 0.14542029 0.02620831 0.0006712049 0.02028054 0.0003962720
## 71      1 0.0025950242 0.03384906 0.14294419 0.02625508 0.0006696809 0.02042607 0.0003933469
## 72      1 0.0027185882 0.03390323 0.14121072 0.02629583 0.0006718838 0.02019410 0.0003908570
## 73      1 0.0028480359 0.03395991 0.13937395 0.02633826 0.0006714922 0.02007112 0.0003866584
## 74      1 0.0029836472 0.03401841 0.13750608 0.02638257 0.0006717751 0.01996412 0.0003836987
## 75      1 0.0031257158 0.03408236 0.13530288 0.02643130 0.0006722409 0.01984422 0.0003804000
## 76      1 0.0032745492 0.03415226 0.13269500 0.02648415 0.0006729035 0.01972421 0.0003768266
## 77      1 0.0034304693 0.03422882 0.12957397 0.02654150 0.0006737146 0.01959511 0.0003740678
## 78      1 0.0035938137 0.03431265 0.12582703 0.02660394 0.0006746941 0.01945866 0.0003707322
## 79      1 0.0037649358 0.03440441 0.12131819 0.02667220 0.0006758644 0.01931746 0.0003665531
## 80      1 0.0039442061 0.03450485 0.11588703 0.02674552 0.0006772510 0.01917426 0.0003628740
## 81      1 0.0041320124 0.03461439 0.10938613 0.02682428 0.0006788954 0.01904253 0.0003598815
## 82      1 0.0043287613 0.03473277 0.10172545 0.02691068 0.0006812282 0.01881231 0.0003564423
## 83      1 0.0045348785 0.03485016 0.09386179 0.02699699 0.0006826026 0.01919357 0.0003548354
## 84      1 0.0047508102 0.03494206 0.08810816 0.02706372 0.0006881413 0.01794054 0.0003611619
## 85      1 0.0049770236 0.03502160 0.08351759 0.02712016 0.0006841371 0.01834190 0.0003608311
## 86      1 0.0052140083 0.03506365 0.08268736 0.02714938 0.0006933787 0.01766165 0.0003683290
## 87      1 0.0054622772 0.03510491 0.08228434 0.02717880 0.0006980672 0.01755866 0.0003733869
## 88      1 0.0057223677 0.03514622 0.08228409 0.02720772 0.0007035110 0.01755905 0.0003775252
## 89      1 0.0059948425 0.03519151 0.08228409 0.02723899 0.0007092091 0.01755905 0.0003820465
## 90      1 0.0062802914 0.03524115 0.08228409 0.02727347 0.0007151715 0.01755905 0.0003868525
## 91      1 0.0065793322 0.03529556 0.08228409 0.02731186 0.0007214095 0.01755905 0.0003917255
## 92      1 0.0068926121 0.03535518 0.08228409 0.02735411 0.0007279351 0.01755905 0.0003967242
## 93      1 0.0072208090 0.03542051 0.08228409 0.02740023 0.0007347608 0.01755905 0.0004018903
## 94      1 0.0075646333 0.03549207 0.08228409 0.02745106 0.0007418997 0.01755905 0.0004074043
## 95      1 0.0079248290 0.03557046 0.08228409 0.02750742 0.0007493653 0.01755905 0.0004135891
## 96      1 0.0083021757 0.03565629 0.08228409 0.02756928 0.0007571721 0.01755905 0.0004205084
## 97      1 0.0086974900 0.03575027 0.08228409 0.02763929 0.0007653349 0.01755905 0.0004274920
## 98      1 0.0091116276 0.03585313 0.08228409 0.02771663 0.0007738697 0.01755905 0.0004354962
## 99      1 0.0095454846 0.03596570 0.08228409 0.02780066 0.0007827932 0.01755905 0.0004441064
## 100     1 0.0100000000 0.03608884 0.08228409 0.02789191 0.0007921235 0.01755905 0.0004534858

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##                model.coef
## (Intercept)  2.096520e+00
## PC1         -9.434734e-04
## PC2          1.163513e-03
## PC3         -2.405772e-04
## PC4          1.273234e-04
## PC5          6.383566e-04
## PC6          3.112195e-05
## PC7         -4.718467e-04
## PC8         -1.663756e-04
## PC9         -1.889095e-04
## PC10         2.190858e-04
## PC11        -1.321923e-03
## PC12         3.080031e-04
## PC13         3.717534e-04
## PC14         1.454215e-03
## PC15        -3.176498e-04
## PC16         7.437512e-04
## PC17        -1.221111e-04
## PC18         3.542595e-04
## PC19        -2.776758e-04
## PC20         8.607844e-04
## PC21        -9.666859e-04
## PC22         3.952461e-03
## PC23        -4.166956e-04
## PC24        -1.331638e-03
## PC25         8.622455e-04
## PC26        -3.465631e-04
## PC27        -2.514303e-04
## PC28         6.568033e-04
## PC29        -6.027372e-04
## PC30         5.293043e-04
## PC31        -2.300414e-04
## PC32         1.197365e-03
## PC33         1.609246e-05
## PC36         4.581326e-04
## PC37        -2.499862e-04
## PC38         3.020890e-04
## PC39         2.102455e-04
## PC40         1.794375e-04
## PC41        -5.773453e-04
## PC42        -4.306599e-04
## PC43         5.851307e-04
## PC44        -3.861284e-04
## PC45         1.097755e-03
## PC46         1.381582e-03
## PC47         4.254507e-04
## PC49         6.504350e-05
## PC50        -7.591232e-04
## PC51         4.131483e-04
## PC52         2.715325e-04
## PC53        -4.125363e-04
## PC55         2.562778e-05
## PC57         2.963589e-04
## PC58        -4.270153e-04
## PC59         1.139268e-03
## PC60        -6.675211e-04
## PC61        -1.468285e-04
## PC62        -4.333352e-04
## PC63         5.431812e-04
## PC64         6.411887e-05
## PC65         7.599744e-04
## PC66         4.984856e-05
## PC67         2.625191e-04
## PC68         7.676511e-04
## PC69        -1.125275e-03
## PC71        -6.910116e-04
## PC72         7.123027e-04
## PC73         4.641420e-04
## PC75         1.261676e-04
## PC76         1.467970e-03
## PC78        -2.470587e-04
## PC79         6.842701e-04
## PC80        -2.375074e-04
## PC81        -4.368029e-04
## PC82        -9.842284e-04
## PC83         1.049882e-03
## PC85         2.695494e-04
## PC86         4.221395e-04
## PC87         4.448277e-04
## PC88         2.953262e-04
## PC89         1.261681e-04
## PC90        -2.230788e-03
## PC91         2.686513e-04
## PC92         1.551006e-04
## PC93        -2.914432e-05
## PC94         4.116545e-04
## PC95         1.146995e-04
## PC96        -7.456882e-04
## PC97        -1.571455e-04
## PC98         4.396447e-04
## PC101       -8.547762e-04
## PC102        1.297625e-03
## PC103        1.320225e-05
## PC104        5.103250e-04
## PC105        1.117939e-04
## PC106        8.040508e-04
## PC107        2.590163e-04
## PC108       -1.068187e-03
## PC109       -1.796570e-04
## PC110       -2.752258e-04
## PC111       -6.690987e-04
## PC112        7.819293e-04
## PC114       -1.607113e-04
## PC115       -5.157004e-04
## PC117       -1.544365e-03
## PC118       -8.037471e-04
## PC119       -6.830647e-04
## PC120       -3.994250e-04
## PC121        2.625639e-04
## PC123       -5.934911e-04
## PC126        8.771437e-04
## PC127        9.211607e-04
## PC128       -3.131555e-04
## PC129       -6.028241e-04
## PC131        1.008670e-03
## PC132        1.364081e-03
## PC133       -3.482159e-04
## PC134       -1.411284e-04
## PC136        4.925246e-04
## PC138        9.809952e-04

Test

if (algo.LASSO.caret == TRUE){
  test.model(model.LASSO.caret, data.test
             ,method = 'glmnet',subopt = "LASSO"
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.037   2.086   2.098   2.097   2.109   2.154 
## [1] "glmnet LASSO Test MSE: 0.00102589494768998"

LARS with CV

Train

if (algo.LARS.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "lars"
                                   ,subopt = 'NULL'
                                   ,feature.names = feature.names)
  model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.848 on full training set
## Least Angle Regression 
## 
## 5584 samples
##  138 predictor
## 
## Pre-processing: centered (138), scaled (138) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   fraction    RMSE        Rsquared    MAE       
##   0.00000000  0.03617859         NaN  0.02795891
##   0.01010101  0.03583652  0.08228409  0.02770456
##   0.02020202  0.03553677  0.08228409  0.02748340
##   0.03030303  0.03528042  0.08228409  0.02730132
##   0.04040404  0.03506951  0.08236334  0.02715343
##   0.05050505  0.03491192  0.08960292  0.02704117
##   0.06060606  0.03475285  0.10045185  0.02692344
##   0.07070707  0.03459739  0.11057146  0.02681071
##   0.08080808  0.03445064  0.11901463  0.02670491
##   0.09090909  0.03431271  0.12593822  0.02660274
##   0.10101010  0.03418370  0.13154481  0.02650672
##   0.11111111  0.03406372  0.13604287  0.02641613
##   0.12121212  0.03395489  0.13959693  0.02633448
##   0.13131313  0.03386210  0.14245698  0.02626525
##   0.14141414  0.03377608  0.14568772  0.02620197
##   0.15151515  0.03369163  0.14931545  0.02613907
##   0.16161616  0.03360996  0.15274363  0.02607883
##   0.17171717  0.03353216  0.15589374  0.02602156
##   0.18181818  0.03345971  0.15865386  0.02596743
##   0.19191919  0.03339432  0.16099539  0.02591723
##   0.20202020  0.03333573  0.16305384  0.02586990
##   0.21212121  0.03328017  0.16513137  0.02582496
##   0.22222222  0.03323077  0.16698990  0.02578614
##   0.23232323  0.03318498  0.16871462  0.02575066
##   0.24242424  0.03314283  0.17029447  0.02571768
##   0.25252525  0.03310373  0.17175409  0.02568550
##   0.26262626  0.03306739  0.17311945  0.02565402
##   0.27272727  0.03303296  0.17443936  0.02562406
##   0.28282828  0.03300016  0.17572874  0.02559644
##   0.29292929  0.03297050  0.17686835  0.02557202
##   0.30303030  0.03294296  0.17790387  0.02554897
##   0.31313131  0.03291665  0.17888121  0.02552665
##   0.32323232  0.03289223  0.17977739  0.02550648
##   0.33333333  0.03286856  0.18065191  0.02548749
##   0.34343434  0.03284689  0.18144312  0.02547007
##   0.35353535  0.03282641  0.18219490  0.02545282
##   0.36363636  0.03280610  0.18295531  0.02543552
##   0.37373737  0.03278637  0.18370210  0.02541827
##   0.38383838  0.03276658  0.18447601  0.02540150
##   0.39393939  0.03274728  0.18524226  0.02538516
##   0.40404040  0.03272834  0.18599698  0.02536882
##   0.41414141  0.03271014  0.18671707  0.02535287
##   0.42424242  0.03269230  0.18742583  0.02533727
##   0.43434343  0.03267488  0.18811888  0.02532154
##   0.44444444  0.03265732  0.18882764  0.02530588
##   0.45454545  0.03264038  0.18950682  0.02529110
##   0.46464646  0.03262403  0.19016099  0.02527702
##   0.47474747  0.03260805  0.19080251  0.02526330
##   0.48484848  0.03259262  0.19142469  0.02525033
##   0.49494949  0.03257790  0.19201249  0.02523795
##   0.50505051  0.03256399  0.19256197  0.02522640
##   0.51515152  0.03255102  0.19306559  0.02521570
##   0.52525253  0.03253882  0.19353232  0.02520528
##   0.53535354  0.03252713  0.19397885  0.02519519
##   0.54545455  0.03251577  0.19441402  0.02518549
##   0.55555556  0.03250476  0.19483713  0.02517619
##   0.56565657  0.03249417  0.19524482  0.02516728
##   0.57575758  0.03248415  0.19562789  0.02515872
##   0.58585859  0.03247479  0.19598221  0.02515071
##   0.59595960  0.03246614  0.19630629  0.02514381
##   0.60606061  0.03245776  0.19662319  0.02513753
##   0.61616162  0.03244964  0.19693214  0.02513143
##   0.62626263  0.03244200  0.19722417  0.02512566
##   0.63636364  0.03243467  0.19750586  0.02512017
##   0.64646465  0.03242790  0.19776511  0.02511505
##   0.65656566  0.03242148  0.19801191  0.02511029
##   0.66666667  0.03241550  0.19824327  0.02510545
##   0.67676768  0.03240951  0.19848078  0.02510054
##   0.68686869  0.03240376  0.19871261  0.02509564
##   0.69696970  0.03239833  0.19893331  0.02509088
##   0.70707071  0.03239308  0.19915097  0.02508617
##   0.71717172  0.03238818  0.19935621  0.02508168
##   0.72727273  0.03238351  0.19955636  0.02507737
##   0.73737374  0.03237932  0.19973857  0.02507352
##   0.74747475  0.03237516  0.19992662  0.02506993
##   0.75757576  0.03237120  0.20010944  0.02506657
##   0.76767677  0.03236760  0.20028041  0.02506361
##   0.77777778  0.03236453  0.20043147  0.02506089
##   0.78787879  0.03236202  0.20056203  0.02505858
##   0.79797980  0.03236000  0.20067437  0.02505660
##   0.80808081  0.03235849  0.20076757  0.02505498
##   0.81818182  0.03235747  0.20084308  0.02505367
##   0.82828283  0.03235661  0.20091785  0.02505232
##   0.83838384  0.03235611  0.20098162  0.02505121
##   0.84848485  0.03235594  0.20103537  0.02505029
##   0.85858586  0.03235616  0.20107640  0.02504957
##   0.86868687  0.03235695  0.20109733  0.02504941
##   0.87878788  0.03235820  0.20110218  0.02504976
##   0.88888889  0.03235998  0.20108877  0.02505058
##   0.89898990  0.03236227  0.20105801  0.02505182
##   0.90909091  0.03236501  0.20101345  0.02505317
##   0.91919192  0.03236821  0.20095389  0.02505474
##   0.92929293  0.03237179  0.20088332  0.02505645
##   0.93939394  0.03237574  0.20080267  0.02505836
##   0.94949495  0.03238005  0.20071203  0.02506049
##   0.95959596  0.03238484  0.20060560  0.02506312
##   0.96969697  0.03239017  0.20048122  0.02506628
##   0.97979798  0.03239589  0.20034591  0.02506967
##   0.98989899  0.03240177  0.20021106  0.02507302
##   1.00000000  0.03240803  0.20006564  0.02507649
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.8484848.

##     fraction
## 85 0.8484848
## Warning: Removed 1 rows containing missing values (geom_point).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##           PC1           PC2           PC3           PC4           PC5           PC6           PC7           PC8 
## -4.410256e-03  4.928519e-03 -9.215452e-04  4.803960e-04  2.385848e-03  1.111594e-04 -1.723726e-03 -6.066736e-04 
##           PC9          PC10          PC11          PC12          PC13          PC14          PC15          PC16 
## -6.595839e-04  7.540574e-04 -4.476848e-03  1.040885e-03  1.216707e-03  4.802392e-03 -1.020253e-03  2.341349e-03 
##          PC17          PC18          PC19          PC20          PC21          PC22          PC23          PC24 
## -3.683588e-04  1.049358e-03 -7.984739e-04  2.371517e-03 -2.600206e-03  1.021933e-02 -5.320689e-04 -1.537817e-03 
##          PC25          PC26          PC27          PC28          PC29          PC30          PC31          PC32 
##  1.003121e-03 -3.971241e-04 -2.859889e-04  7.502361e-04 -6.881687e-04  6.032317e-04 -2.584945e-04  1.371130e-03 
##          PC33          PC36          PC37          PC38          PC39          PC40          PC41          PC42 
##  1.401775e-05  5.156979e-04 -2.793512e-04  3.350076e-04  2.308271e-04  1.976635e-04 -6.501462e-04 -4.799941e-04 
##          PC43          PC44          PC45          PC46          PC47          PC49          PC50          PC51 
##  6.506624e-04 -4.295708e-04  1.227854e-03  1.538914e-03  4.690743e-04  6.791953e-05 -8.384836e-04  4.587936e-04 
##          PC52          PC53          PC55          PC57          PC58          PC59          PC60          PC61 
##  2.978809e-04 -4.495925e-04  2.339435e-05  3.223448e-04 -4.643092e-04  1.239946e-03 -7.222865e-04 -1.536785e-04 
##          PC62          PC63          PC64          PC65          PC66          PC67          PC68          PC69 
## -4.696496e-04  5.845760e-04  6.484707e-05  8.160352e-04  4.954148e-05  2.784324e-04  8.247424e-04 -1.208095e-03 
##          PC71          PC72          PC73          PC75          PC76          PC78          PC79          PC80 
## -7.378094e-04  7.642804e-04  4.961340e-04  1.298233e-04  1.570179e-03 -2.580188e-04  7.264497e-04 -2.482595e-04 
##          PC81          PC82          PC83          PC85          PC86          PC87          PC88          PC89 
## -4.565037e-04 -1.043217e-03  1.112338e-03  2.800245e-04  4.374583e-04  4.637188e-04  3.059287e-04  1.285456e-04 
##          PC90          PC91          PC92          PC93          PC94          PC95          PC96          PC97 
## -2.340728e-03  2.780811e-04  1.582541e-04 -2.611301e-05  4.255402e-04  1.155077e-04 -7.699293e-04 -1.587765e-04 
##          PC98         PC101         PC102         PC103         PC104         PC105         PC106         PC107 
##  4.497164e-04 -8.835800e-04  1.331258e-03  8.767323e-06  5.217942e-04  1.096582e-04  8.191342e-04  2.616697e-04 
##         PC108         PC109         PC110         PC111         PC112         PC114         PC115         PC117 
## -1.090502e-03 -1.794619e-04 -2.778376e-04 -6.796458e-04  7.950152e-04 -1.589168e-04 -5.214930e-04 -1.563291e-03 
##         PC118         PC119         PC120         PC121         PC123         PC126         PC127         PC128 
## -8.096809e-04 -6.824433e-04 -3.983990e-04  2.589110e-04 -5.919343e-04  8.740385e-04  9.263851e-04 -3.064481e-04 
##         PC129         PC131         PC132         PC133         PC134         PC136         PC138 
## -5.978469e-04  9.995790e-04  1.359012e-03 -3.413935e-04 -1.358413e-04  4.816001e-04  9.585577e-04

Test

if (algo.LARS.caret == TRUE){
  test.model(model.LARS.caret, data.test
             ,method = 'lars',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.037   2.086   2.098   2.097   2.109   2.154 
## [1] "lars  Test MSE: 0.00102574845474681"

Session Info

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2             knitr_1.20                 htmltools_0.3.6            reshape2_1.4.3            
##  [5] lars_1.2                   doParallel_1.0.14          iterators_1.0.10           caret_6.0-81              
##  [9] leaps_3.0                  ggforce_0.1.3              rlist_0.4.6.1              car_3.0-2                 
## [13] carData_3.0-2              bestNormalize_1.3.0        scales_1.0.0               onewaytests_2.0           
## [17] caTools_1.17.1.1           mosaic_1.5.0               mosaicData_0.17.0          ggformula_0.9.1           
## [21] ggstance_0.3.1             lattice_0.20-35            DT_0.5                     ggiraph_0.6.0             
## [25] investr_1.4.0              glmnet_2.0-16              foreach_1.4.4              Matrix_1.2-14             
## [29] MASS_7.3-50                PerformanceAnalytics_1.5.2 xts_0.11-2                 zoo_1.8-4                 
## [33] forcats_0.3.0              stringr_1.3.1              dplyr_0.7.8                purrr_0.2.5               
## [37] readr_1.3.1                tidyr_0.8.2                tibble_1.4.2               ggplot2_3.1.0             
## [41] tidyverse_1.2.1            usdm_1.1-18                raster_2.8-4               sp_1.3-1                  
## [45] pacman_0.5.0              
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.2.0       backports_1.1.3    plyr_1.8.4         lazyeval_0.2.1     splines_3.5.1      mycor_0.1.1       
##  [7] crosstalk_1.0.0    leaflet_2.0.2      digest_0.6.18      magrittr_1.5       mosaicCore_0.6.0   openxlsx_4.1.0    
## [13] recipes_0.1.4      modelr_0.1.2       gower_0.1.2        colorspace_1.3-2   rvest_0.3.2        ggrepel_0.8.0     
## [19] haven_2.0.0        crayon_1.3.4       jsonlite_1.5       bindr_0.1.1        survival_2.42-3    glue_1.3.0        
## [25] registry_0.5       gtable_0.2.0       ppcor_1.1          ipred_0.9-8        abind_1.4-5        rngtools_1.3.1    
## [31] bibtex_0.4.2       Rcpp_1.0.0         xtable_1.8-3       units_0.6-2        foreign_0.8-70     stats4_3.5.1      
## [37] lava_1.6.4         prodlim_2018.04.18 htmlwidgets_1.3    httr_1.4.0         RColorBrewer_1.1-2 pkgconfig_2.0.2   
## [43] farver_1.1.0       nnet_7.3-12        labeling_0.3       tidyselect_0.2.5   rlang_0.3.1        later_0.7.5       
## [49] munsell_0.5.0      cellranger_1.1.0   tools_3.5.1        cli_1.0.1          generics_0.0.2     moments_0.14      
## [55] sjlabelled_1.0.17  broom_0.5.1        evaluate_0.12      ggdendro_0.1-20    yaml_2.2.0         ModelMetrics_1.2.2
## [61] zip_2.0.1          nlme_3.1-137       doRNG_1.7.1        mime_0.6           xml2_1.2.0         compiler_3.5.1    
## [67] rstudioapi_0.8     curl_3.2           tweenr_1.0.1       stringi_1.2.4      gdtools_0.1.7      pillar_1.3.1      
## [73] data.table_1.11.8  bitops_1.0-6       insight_0.1.2      httpuv_1.4.5       R6_2.3.0           promises_1.0.1    
## [79] gridExtra_2.3      rio_0.5.16         codetools_0.2-15   assertthat_0.2.0   pkgmaker_0.27      withr_2.1.2       
## [85] nortest_1.0-4      mgcv_1.8-24        hms_0.4.2          quadprog_1.5-5     grid_3.5.1         rpart_4.1-13      
## [91] timeDate_3043.102  class_7.3-14       rmarkdown_1.11     shiny_1.2.0        lubridate_1.7.4